/* =========================================================================== Doom 3 BFG Edition GPL Source Code Copyright (C) 1993-2012 id Software LLC, a ZeniMax Media company. This file is part of the Doom 3 BFG Edition GPL Source Code ("Doom 3 BFG Edition Source Code"). Doom 3 BFG Edition Source Code is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Doom 3 BFG Edition Source Code is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with Doom 3 BFG Edition Source Code. If not, see . In addition, the Doom 3 BFG Edition Source Code is also subject to certain additional terms. You should have received a copy of these additional terms immediately following the terms and conditions of the GNU General Public License which accompanied the Doom 3 BFG Edition Source Code. If not, please request a copy in writing from id Software at the address below. If you have questions concerning this license or the applicable additional terms, you may contact in writing id Software LLC, c/o ZeniMax Media Inc., Suite 120, Rockville, Maryland 20850 USA. =========================================================================== */ #ifndef __LEXER_H__ #define __LEXER_H__ /* =============================================================================== Lexicographical parser Does not use memory allocation during parsing. The lexer uses no memory allocation if a source is loaded with LoadMemory(). However, idToken may still allocate memory for large strings. A number directly following the escape character '\' in a string is assumed to be in decimal format instead of octal. Binary numbers of the form 0b.. or 0B.. can also be used. =============================================================================== */ // lexer flags typedef enum { LEXFL_NOERRORS = BIT( 0 ), // don't print any errors LEXFL_NOWARNINGS = BIT( 1 ), // don't print any warnings LEXFL_NOFATALERRORS = BIT( 2 ), // errors aren't fatal LEXFL_NOSTRINGCONCAT = BIT( 3 ), // multiple strings seperated by whitespaces are not concatenated LEXFL_NOSTRINGESCAPECHARS = BIT( 4 ), // no escape characters inside strings LEXFL_NODOLLARPRECOMPILE = BIT( 5 ), // don't use the $ sign for precompilation LEXFL_NOBASEINCLUDES = BIT( 6 ), // don't include files embraced with < > LEXFL_ALLOWPATHNAMES = BIT( 7 ), // allow path seperators in names LEXFL_ALLOWNUMBERNAMES = BIT( 8 ), // allow names to start with a number LEXFL_ALLOWIPADDRESSES = BIT( 9 ), // allow ip addresses to be parsed as numbers LEXFL_ALLOWFLOATEXCEPTIONS = BIT( 10 ), // allow float exceptions like 1.#INF or 1.#IND to be parsed LEXFL_ALLOWMULTICHARLITERALS = BIT( 11 ), // allow multi character literals LEXFL_ALLOWBACKSLASHSTRINGCONCAT = BIT( 12 ), // allow multiple strings seperated by '\' to be concatenated LEXFL_ONLYSTRINGS = BIT( 13 ) // parse as whitespace deliminated strings (quoted strings keep quotes) } lexerFlags_t; // punctuation ids #define P_RSHIFT_ASSIGN 1 #define P_LSHIFT_ASSIGN 2 #define P_PARMS 3 #define P_PRECOMPMERGE 4 #define P_LOGIC_AND 5 #define P_LOGIC_OR 6 #define P_LOGIC_GEQ 7 #define P_LOGIC_LEQ 8 #define P_LOGIC_EQ 9 #define P_LOGIC_UNEQ 10 #define P_MUL_ASSIGN 11 #define P_DIV_ASSIGN 12 #define P_MOD_ASSIGN 13 #define P_ADD_ASSIGN 14 #define P_SUB_ASSIGN 15 #define P_INC 16 #define P_DEC 17 #define P_BIN_AND_ASSIGN 18 #define P_BIN_OR_ASSIGN 19 #define P_BIN_XOR_ASSIGN 20 #define P_RSHIFT 21 #define P_LSHIFT 22 #define P_POINTERREF 23 #define P_CPP1 24 #define P_CPP2 25 #define P_MUL 26 #define P_DIV 27 #define P_MOD 28 #define P_ADD 29 #define P_SUB 30 #define P_ASSIGN 31 #define P_BIN_AND 32 #define P_BIN_OR 33 #define P_BIN_XOR 34 #define P_BIN_NOT 35 #define P_LOGIC_NOT 36 #define P_LOGIC_GREATER 37 #define P_LOGIC_LESS 38 #define P_REF 39 #define P_COMMA 40 #define P_SEMICOLON 41 #define P_COLON 42 #define P_QUESTIONMARK 43 #define P_PARENTHESESOPEN 44 #define P_PARENTHESESCLOSE 45 #define P_BRACEOPEN 46 #define P_BRACECLOSE 47 #define P_SQBRACKETOPEN 48 #define P_SQBRACKETCLOSE 49 #define P_BACKSLASH 50 #define P_PRECOMP 51 #define P_DOLLAR 52 // punctuation typedef struct punctuation_s { char* p; // punctuation character(s) int n; // punctuation id } punctuation_t; class idLexer { friend class idParser; public: // constructor idLexer(); idLexer( int flags ); idLexer( const char* filename, int flags = 0, bool OSPath = false ); idLexer( const char* ptr, int length, const char* name, int flags = 0 ); // destructor ~idLexer(); // load a script from the given file at the given offset with the given length int LoadFile( const char* filename, bool OSPath = false ); // load a script from the given memory with the given length and a specified line offset, // so source strings extracted from a file can still refer to proper line numbers in the file // NOTE: the ptr is expected to point at a valid C string: ptr[length] == '\0' int LoadMemory( const char* ptr, int length, const char* name, int startLine = 1 ); // free the script void FreeSource(); // returns true if a script is loaded int IsLoaded() { return idLexer::loaded; }; // read a token int ReadToken( idToken* token ); // expect a certain token, reads the token when available int ExpectTokenString( const char* string ); // expect a certain token type int ExpectTokenType( int type, int subtype, idToken* token ); // expect a token int ExpectAnyToken( idToken* token ); // returns true when the token is available int CheckTokenString( const char* string ); // returns true an reads the token when a token with the given type is available int CheckTokenType( int type, int subtype, idToken* token ); // returns true if the next token equals the given string but does not remove the token from the source int PeekTokenString( const char* string ); // returns true if the next token equals the given type but does not remove the token from the source int PeekTokenType( int type, int subtype, idToken* token ); // skip tokens until the given token string is read int SkipUntilString( const char* string ); // skip the rest of the current line int SkipRestOfLine(); // skip the braced section int SkipBracedSection( bool parseFirstBrace = true ); // skips spaces, tabs, C-like comments etc. Returns false if there is no token left to read. bool SkipWhiteSpace( bool currentLine ); // unread the given token void UnreadToken( const idToken* token ); // read a token only if on the same line int ReadTokenOnLine( idToken* token ); //Returns the rest of the current line const char* ReadRestOfLine( idStr& out ); // read a signed integer int ParseInt(); // read a boolean bool ParseBool(); // read a floating point number. If errorFlag is NULL, a non-numeric token will // issue an Error(). If it isn't NULL, it will issue a Warning() and set *errorFlag = true float ParseFloat( bool* errorFlag = NULL ); // parse matrices with floats int Parse1DMatrix( int x, float* m ); int Parse2DMatrix( int y, int x, float* m ); int Parse3DMatrix( int z, int y, int x, float* m ); // parse a braced section into a string const char* ParseBracedSection( idStr& out ); // parse a braced section into a string, maintaining indents and newlines const char* ParseBracedSectionExact( idStr& out, int tabs = -1 ); // parse the rest of the line const char* ParseRestOfLine( idStr& out ); // pulls the entire line, including the \n at the end const char* ParseCompleteLine( idStr& out ); // retrieves the white space characters before the last read token int GetLastWhiteSpace( idStr& whiteSpace ) const; // returns start index into text buffer of last white space int GetLastWhiteSpaceStart() const; // returns end index into text buffer of last white space int GetLastWhiteSpaceEnd() const; // set an array with punctuations, NULL restores default C/C++ set, see default_punctuations for an example void SetPunctuations( const punctuation_t* p ); // returns a pointer to the punctuation with the given id const char* GetPunctuationFromId( int id ); // get the id for the given punctuation int GetPunctuationId( const char* p ); // set lexer flags void SetFlags( int flags ); // get lexer flags int GetFlags(); // reset the lexer void Reset(); // returns true if at the end of the file bool EndOfFile(); // returns the current filename const char* GetFileName(); // get offset in script const int GetFileOffset(); // get file time const ID_TIME_T GetFileTime(); // returns the current line number const int GetLineNum(); // print an error message void Error( VERIFY_FORMAT_STRING const char* str, ... ); // print a warning message void Warning( VERIFY_FORMAT_STRING const char* str, ... ); // returns true if Error() was called with LEXFL_NOFATALERRORS or LEXFL_NOERRORS set bool HadError() const; // set the base folder to load files from static void SetBaseFolder( const char* path ); private: int loaded; // set when a script file is loaded from file or memory idStr filename; // file name of the script int allocated; // true if buffer memory was allocated const char* buffer; // buffer containing the script const char* script_p; // current pointer in the script const char* end_p; // pointer to the end of the script const char* lastScript_p; // script pointer before reading token const char* whiteSpaceStart_p; // start of last white space const char* whiteSpaceEnd_p; // end of last white space ID_TIME_T fileTime; // file time int length; // length of the script in bytes int line; // current line in script int lastline; // line before reading token int tokenavailable; // set by unreadToken int flags; // several script flags const punctuation_t* punctuations; // the punctuations used in the script int* punctuationtable; // ASCII table with punctuations int* nextpunctuation; // next punctuation in chain idToken token; // available token idLexer* next; // next script in a chain bool hadError; // set by idLexer::Error, even if the error is supressed static char baseFolder[ 256 ]; // base folder to load files from private: void CreatePunctuationTable( const punctuation_t* punctuations ); int ReadWhiteSpace(); int ReadEscapeCharacter( char* ch ); int ReadString( idToken* token, int quote ); int ReadName( idToken* token ); int ReadNumber( idToken* token ); int ReadPunctuation( idToken* token ); int ReadPrimitive( idToken* token ); int CheckString( const char* str ) const; int NumLinesCrossed(); }; ID_INLINE const char* idLexer::GetFileName() { return idLexer::filename; } ID_INLINE const int idLexer::GetFileOffset() { return idLexer::script_p - idLexer::buffer; } ID_INLINE const ID_TIME_T idLexer::GetFileTime() { return idLexer::fileTime; } ID_INLINE const int idLexer::GetLineNum() { return idLexer::line; } ID_INLINE void idLexer::SetFlags( int flags ) { idLexer::flags = flags; } ID_INLINE int idLexer::GetFlags() { return idLexer::flags; } #endif /* !__LEXER_H__ */