// Copyright (C) 2007 Id Software, Inc. // #ifndef __LEXER_H__ #define __LEXER_H__ /* =============================================================================== Lexicographical parser Does not use memory allocation during parsing. The lexer uses no memory allocation if a source is loaded with LoadMemory(). However, idToken may still allocate memory for large strings. A number directly following the escape character '\' in a string is assumed to be in decimal format instead of octal. Binary numbers of the form 0b.. or 0B.. can also be used. =============================================================================== */ // lexer flags enum lexerFlags_t { LEXFL_NOERRORS = BIT(0), // don't print any errors LEXFL_NOWARNINGS = BIT(1), // don't print any warnings LEXFL_NOFATALERRORS = BIT(2), // errors aren't fatal LEXFL_VCSTYLEREPORTS = BIT(3), // warnings and errors are reported in M$ VC style LEXFL_NOSTRINGCONCAT = BIT(4), // multiple strings separated by whitespaces are not concatenated LEXFL_NOSTRINGESCAPECHARS = BIT(5), // no escape characters inside strings LEXFL_NODOLLARPRECOMPILE = BIT(6), // don't use the $ sign for precompilation LEXFL_NOBASEINCLUDES = BIT(7), // don't include files embraced with < > LEXFL_ALLOWPATHNAMES = BIT(8), // allow path separators in names LEXFL_ALLOWNUMBERNAMES = BIT(9), // allow names to start with a number LEXFL_ALLOWIPADDRESSES = BIT(10), // allow ip addresses to be parsed as numbers LEXFL_ALLOWFLOATEXCEPTIONS = BIT(11), // allow float exceptions like 1.#INF or 1.#IND to be parsed LEXFL_ALLOWMULTICHARLITERALS = BIT(12), // allow multi character literals LEXFL_ALLOWBACKSLASHSTRINGCONCAT = BIT(13), // allow multiple strings seperated by '\' to be concatenated LEXFL_ONLYSTRINGS = BIT(14), // parse as whitespace deliminated strings (quoted strings keep quotes) LEXFL_NOEMITSTRINGESCAPECHARS = BIT(15), // no escape characters inside strings LEXFL_ALLOWRAWSTRINGBLOCKS = BIT(16), // allow raw text blocks embraced with <% %> }; // punctuation ids #define P_RSHIFT_ASSIGN 1 #define P_LSHIFT_ASSIGN 2 #define P_PARMS 3 #define P_PRECOMPMERGE 4 #define P_LOGIC_AND 5 #define P_LOGIC_OR 6 #define P_LOGIC_GEQ 7 #define P_LOGIC_LEQ 8 #define P_LOGIC_EQ 9 #define P_LOGIC_UNEQ 10 #define P_MUL_ASSIGN 11 #define P_DIV_ASSIGN 12 #define P_MOD_ASSIGN 13 #define P_ADD_ASSIGN 14 #define P_SUB_ASSIGN 15 #define P_INC 16 #define P_DEC 17 #define P_BIN_AND_ASSIGN 18 #define P_BIN_OR_ASSIGN 19 #define P_BIN_XOR_ASSIGN 20 #define P_RSHIFT 21 #define P_LSHIFT 22 #define P_SCOPE_RESOLUTION 23 #define P_MEMBER_SELECTION_OBJECT 24 #define P_MEMBER_SELECTION_POINTER 25 #define P_POINTER_TO_MEMBER_OBJECT 26 #define P_POINTER_TO_MEMBER_POINTER 27 #define P_MUL 28 #define P_DIV 29 #define P_MOD 30 #define P_ADD 31 #define P_SUB 32 #define P_ASSIGN 33 #define P_BIN_AND 34 #define P_BIN_OR 35 #define P_BIN_XOR 36 #define P_BIN_NOT 37 #define P_LOGIC_NOT 38 #define P_LOGIC_GREATER 39 #define P_LOGIC_LESS 40 #define P_COMMA 41 #define P_SEMICOLON 42 #define P_COLON 43 #define P_QUESTIONMARK 44 #define P_PARENTHESESOPEN 45 #define P_PARENTHESESCLOSE 46 #define P_BRACEOPEN 47 #define P_BRACECLOSE 48 #define P_SQBRACKETOPEN 49 #define P_SQBRACKETCLOSE 50 #define P_BACKSLASH 51 #define P_PRECOMP 52 #define P_DOLLAR 53 // punctuation struct punctuation_t { char *p; // punctuation character(s) int n; // punctuation id }; class idLexer { friend class idParser; public: // constructor idLexer(); idLexer( int flags ); idLexer( const char *filename, int flags = 0, bool OSPath = false, int startLine = 1 ); idLexer( const char *ptr, int length, const char *name, int flags = 0, int startLine = 1 ); // destructor ~idLexer(); // load a script from the given file at the given offset with the given length bool LoadFile( const char *filename, bool OSPath = false, int startLine = 1 ); // load a script from the given memory with the given length and a specified line offset, // so source strings extracted from a file can still refer to proper line numbers in the file // NOTE: the ptr is expected to point at a valid C string: ptr[length] == '\0' bool LoadMemory( const char *ptr, int length, const char *name, int startLine = 1 ); // Load a binary token table and indices as a token stream bool LoadMemoryBinary( const byte* ptr, int length, const char *name, idTokenCache* globals = NULL ); bool LoadTokenStream( const idList& indices, const idTokenCache& tokens, const char* name ); idLexerBinary& GetBinary() ; const idLexerBinary& GetBinary() const; // free the script void FreeSource( void ); // returns true if a script is loaded bool IsLoaded( void ) const { return loaded; } // read a token int ReadToken( idToken *token ); // expect a certain token, reads the token when available bool ExpectTokenString( const char *string ); // expect a certain token type bool ExpectTokenType( int type, int subtype, idToken *token ); // expect a token bool ExpectAnyToken( idToken *token ); // returns true when the token is available bool CheckTokenString( const char *string ); // returns true an reads the token when a token with the given type is available bool CheckTokenType( int type, int subtype, idToken *token ); // returns true if the next token equals the given string but does not remove the token from the source int PeekTokenString( const char *string ); // returns true if the next token equals the given type but does not remove the token from the source int PeekTokenType( int type, int subtype, idToken *token ); // skip tokens until the given token string is read bool SkipUntilString( const char *string, idToken* token = NULL ); // skip the rest of the current line int SkipRestOfLine( void ); // skip the braced section int SkipBracedSection( bool parseFirstBrace = true ); // skip the braced section, maintaining indents and newlines bool SkipBracedSectionExact( int tabs = -1, bool parseFirstBrace = true ); // skips spaces, tabs, C-like comments etc. int SkipWhiteSpace( bool currentLine ); // unread the given token void UnreadToken( const idToken *token ); // read a token only if on the same line int ReadTokenOnLine( idToken *token ); // read a signed integer int ParseInt( void ); // read a boolean bool ParseBool( void ); // read a floating point number. If errorFlag is NULL, a non-numeric token will // issue an Error(). If it isn't NULL, it will issue a Warning() and set *errorFlag = true float ParseFloat( bool *errorFlag = NULL ); // parse matrices with floats int Parse1DMatrix( int x, float *m, bool expectCommas = false ); int Parse2DMatrix( int y, int x, float *m ); int Parse3DMatrix( int z, int y, int x, float *m ); // parse a braced section into a string const char * ParseBracedSection( idStr &out, int tabs = -1, bool parseFirstBrace = true, char intro = '{', char outro = '}' ); // parse a braced section into a string, maintaining indents and newlines bool ParseBracedSectionExact( idStr &out, int tabs = -1, bool parseFirstBrace = true ); // parse the rest of the line const char * ParseRestOfLine( idStr &out ); // pulls the entire line, including the \n at the end const char * ParseCompleteLine( idStr &out ); // retrieves the white space after the last read token int GetNextWhiteSpace( idStr &whiteSpace, bool currentLine ); // retrieves the white space characters before the last read token int GetLastWhiteSpace( idStr &whiteSpace ) const; // returns start index into text buffer of last white space int GetLastWhiteSpaceStart( void ) const; // returns end index into text buffer of last white space int GetLastWhiteSpaceEnd( void ) const; // set an array with punctuations, NULL restores default C/C++ set, see default_punctuations for an example void SetPunctuations( const punctuation_t *p ); // returns a pointer to the punctuation with the given id const char * GetPunctuationFromId( int id ) const; // get the id for the given punctuation int GetPunctuationId( const char *p ) const; // set lexer flags void SetFlags( int flags ); // get lexer flags int GetFlags( void ) const; // reset the lexer void Reset( void ); // returns true if at the end of the file int EndOfFile( void ) const; // returns the current filename const char * GetFileName( void ) const; // get offset in script int GetFileOffset( void ) const; // get offset in script int GetLastFileOffset( void ) const; // get total size of script int GetFileSize( void ) const; // get file time unsigned int GetFileTime( void ) const; // returns the current line number int GetLineNum( void ) const; // print an error message void Error( const char *str, ... ); // print a warning message void Warning( const char *str, ... ); // returns true if Error() was called with LEXFL_NOFATALERRORS or LEXFL_NOERRORS set bool HadError( void ) const; // returns true if any warnings were printed bool HadWarning( void ) const; // set the base folder to load files from static void SetBaseFolder( const char *path ); private: idLexer( const idLexer& rhs ); private: bool loaded; // set when a script file is loaded from file or memory idStr filename; // file name of the script int allocated; // true if buffer memory was allocated const char * buffer; // buffer containing the script const char * script_p; // current pointer in the script const char * end_p; // pointer to the end of the script const char * lastScript_p; // script pointer before reading token const char * whiteSpaceStart_p; // start of last white space const char * whiteSpaceEnd_p; // end of last white space unsigned int fileTime; // file time int length; // length of the script in bytes int line; // current line in script int lastline; // line before reading token int flags; // several script flags const punctuation_t *punctuations; // the punctuations used in the script int * punctuationtable; // ASCII table with punctuations int * nextpunctuation; // next punctuation in chain idList< idToken > tokens; // available token idLexer * next; // next script in a chain bool hadError; // set by idLexer::Error, even if the error is suppressed bool hadWarning; // set by idLexer::Warning, even if the warning is suppressed idLexerBinary binary; static char baseFolder[ 256 ]; // base folder to load files from private: void CreatePunctuationTable( const punctuation_t *punctuations ); int ReadWhiteSpace( void ); int ReadEscapeCharacter( char *ch ); int ReadString( idToken *token, int quote ); int ReadName( idToken *token ); int ReadNumber( idToken *token ); int ReadRawStringBlock( idToken *token ); int ReadPunctuation( idToken *token ); int CheckString( const char *str ) const; int NumLinesCrossed( void ) const; }; ID_INLINE const char *idLexer::GetFileName( void ) const { return this->filename; } ID_INLINE int idLexer::GetFileOffset( void ) const { return this->script_p - this->buffer; } ID_INLINE int idLexer::GetLastFileOffset( void ) const { return this->lastScript_p - this->buffer; } ID_INLINE int idLexer::GetFileSize( void ) const { return this->end_p - this->buffer; } ID_INLINE unsigned int idLexer::GetFileTime( void ) const { return this->fileTime; } ID_INLINE int idLexer::GetLineNum( void ) const { return this->line; } ID_INLINE void idLexer::SetFlags( int flags ) { assert( !loaded ); // all flags must be set before loading the file this->flags = flags; } ID_INLINE int idLexer::GetFlags( void ) const { return this->flags; } ID_INLINE idLexerBinary& idLexer::GetBinary() { return binary; } ID_INLINE const idLexerBinary& idLexer::GetBinary() const { return binary; } #endif /* !__LEXER_H__ */