#if !defined( INCLUDED_SCRIPT_SCRIPTTOKENISER_H ) #define INCLUDED_SCRIPT_SCRIPTTOKENISER_H #include "iscriplib.h" class ScriptTokeniser : public Tokeniser { enum CharType { eWhitespace, eCharToken, eNewline, eCharQuote, eCharSolidus, eCharStar, eCharSpecial, }; typedef bool ( ScriptTokeniser::*Tokenise )( char c ); Tokenise m_stack[3]; Tokenise* m_state; SingleCharacterInputStream m_istream; std::size_t m_scriptline; char m_token[MAXTOKEN]; char* m_write; char m_current; bool m_eof; bool m_crossline; bool m_unget; bool m_special; CharType charType( const char c ){ switch ( c ) { case '\n': return eNewline; case '"': return eCharQuote; case '/': return eCharSolidus; case '*': return eCharStar; case '{': case '(': case '}': case ')': case '[': case ']': case ',': case ':': return ( m_special ) ? eCharSpecial : eCharToken; } if ( c > 32 ) { return eCharToken; } return eWhitespace; } Tokenise state(){ return *m_state; } void push( Tokenise state ){ ASSERT_MESSAGE( m_state != m_stack + 2, "token parser: illegal stack push" ); *( ++m_state ) = state; } void pop(){ ASSERT_MESSAGE( m_state != m_stack, "token parser: illegal stack pop" ); --m_state; } void add( const char c ){ if ( m_write < m_token + MAXTOKEN - 1 ) { *m_write++ = c; } } void remove(){ ASSERT_MESSAGE( m_write > m_token, "no char to remove" ); --m_write; } bool tokeniseDefault( char c ){ switch ( charType( c ) ) { case eNewline: ASSERT_MESSAGE( m_crossline, "unexpected end-of-line before token" ); break; case eCharToken: case eCharStar: push( Tokenise( &ScriptTokeniser::tokeniseToken ) ); add( c ); break; case eCharSpecial: push( Tokenise( &ScriptTokeniser::tokeniseSpecial ) ); add( c ); break; case eCharQuote: push( Tokenise( &ScriptTokeniser::tokeniseQuotedToken ) ); break; case eCharSolidus: push( Tokenise( &ScriptTokeniser::tokeniseSolidus ) ); break; default: break; } return false; } bool tokeniseToken( char c ){ switch ( charType( c ) ) { case eNewline: case eWhitespace: case eCharQuote: case eCharSpecial: pop(); return true; // emit token case eCharSolidus: push( Tokenise( &ScriptTokeniser::tokeniseSolidus ) ); break; case eCharToken: case eCharStar: add( c ); break; default: break; } return false; } bool tokeniseQuotedToken( char c ){ switch ( charType( c ) ) { case eNewline: ASSERT_MESSAGE( !m_crossline, "unexpected end-of-line in quoted token" ); break; case eWhitespace: case eCharToken: case eCharSolidus: case eCharStar: case eCharSpecial: add( c ); break; case eCharQuote: pop(); push( Tokenise( &ScriptTokeniser::tokeniseEndQuote ) ); break; default: break; } return false; } bool tokeniseSolidus( char c ){ switch ( charType( c ) ) { case eNewline: case eWhitespace: case eCharQuote: case eCharSpecial: pop(); add( '/' ); return true; // emit single slash case eCharToken: pop(); add( '/' ); add( c ); break; case eCharSolidus: pop(); push( Tokenise( &ScriptTokeniser::tokeniseComment ) ); break; // dont emit single slash case eCharStar: pop(); push( Tokenise( &ScriptTokeniser::tokeniseBlockComment ) ); break; // dont emit single slash default: break; } return false; } bool tokeniseComment( char c ){ if ( c == '\n' ) { pop(); if ( state() == Tokenise( &ScriptTokeniser::tokeniseToken ) ) { pop(); return true; // emit token immediatly preceding comment } } return false; } bool tokeniseBlockComment( char c ){ if ( c == '*' ) { pop(); push( Tokenise( &ScriptTokeniser::tokeniseEndBlockComment ) ); } return false; } bool tokeniseEndBlockComment( char c ){ switch ( c ) { case '/': pop(); if ( state() == Tokenise( &ScriptTokeniser::tokeniseToken ) ) { pop(); return true; // emit token immediatly preceding comment } break; // dont emit comment case '*': break; // no state change default: pop(); push( Tokenise( &ScriptTokeniser::tokeniseBlockComment ) ); break; } return false; } bool tokeniseEndQuote( char c ){ pop(); return true; // emit quoted token } bool tokeniseSpecial( char c ){ pop(); return true; // emit single-character token } void tokenise(){ m_write = m_token; while ( !eof() ) { char c = m_current; if ( c == '\n' ) { ++m_scriptline; } if ( ( ( *this ).*state() )( c ) ) { return; } m_eof = !m_istream.readChar( m_current ); } } const char* fillToken(){ if ( eof() ) { return 0; } tokenise(); if ( eof() && m_write == m_token ) { return 0; } add( '\0' ); return m_token; } bool eof(){ return m_eof; } public: ScriptTokeniser( TextInputStream& istream, bool special ) : m_state( m_stack ), m_istream( istream ), m_scriptline( 0 ), m_crossline( false ), m_unget( false ), m_special( special ){ m_stack[0] = Tokenise( &ScriptTokeniser::tokeniseDefault ); m_eof = !m_istream.readChar( m_current ); m_token[MAXTOKEN - 1] = '\0'; } void release(){ delete this; } void nextLine(){ m_crossline = true; } const char* getToken(){ if ( m_unget ) { m_unget = false; return m_token; } return fillToken(); } void ungetToken(){ ASSERT_MESSAGE( !m_unget, "can't unget more than one token" ); m_unget = true; } std::size_t getLine() const { return m_scriptline; } }; inline Tokeniser& NewScriptTokeniser( TextInputStream& istream ){ return *( new ScriptTokeniser( istream, true ) ); } inline Tokeniser& NewSimpleTokeniser( TextInputStream& istream ){ return *( new ScriptTokeniser( istream, false ) ); } #endif