quake2-action/acesrc/botscan.c

/*
 * $Header: /LicenseToKill/src/acesrc/botscan.c 1     16/10/99 8:41 Riever $
 *
 * $Log: /LicenseToKill/src/acesrc/botscan.c $
 *
 * 1     16/10/99 8:41 Riever
 * Initial import to LTK
 *
 * 4     15/10/99 8:40 Riever
 * Added carriage return '\r' to whitespace definition.
 *
 * 3     15/10/99 8:11 Riever
 * Forgot to step over last quote in STRLIT - ok now.
 *
 * 2     15/10/99 7:01 Riever
 * Fixed a bug in INTLIT parsing
 *
 * 1     14/10/99 8:26 Riever
 *
 * 4     14/10/99 7:21 Riever
 * Defined all other operators and symbols that will be used.
 * Left out the 'e' notation for REAL numbers since we don't use that.
 *
 * 3     14/10/99 7:02 Riever
 * Changed filename to "botscan.c" to make it clear where it belongs in
 * this project.
 * Added a test parser function in comment block at end of file.
 *
 * 2     14/10/99 6:50 Riever
 * First version - just a compilation check.
 *
 */
//=================================================================
// botscan.c
//
// Connor Caple 14th October 1999
//
// A lexical scanner module to allow much more complex configuration files
// Original code idea : Lee & Mark Atkinson, "Using C", pub. Que
//
// Sample driver included at bottom of file.
//
//=================================================================

#include	"botscan.h"
#include	<stdlib.h>
#include	<stdio.h>

//==============================
// nmtoken
//==============================
// This is only used in testing. It returns a string that matches
// the type of token returned in the scanner
// Very useful when debugging new configuration files
//
// I may write a stand alone config file tester to ensure that
// people use the instructions correctly.
//
char	*nmtoken( int ttype)
{
	static char	*tokenNames[] = {
		"LEXERR",
		"SYMBOL",
		"INTLIT",
		"REALLIT",
		"STRLIT",
		"LPAREN",
		"RPAREN",
		"SEMIC",
		"COLON",
		"COMMA",
		"PERIOD",
		"APOST",
		"PLUSOP",
		"MINUSOP",
		"MUXOP",
		"DIVOP",
		"POWOP",
		"ASSIGNOP",
		"HASH",
		"BANG",
		"EOL",
		"UNDEF"
	};
	return( tokenNames[ttype] );
}

//=================================
// scanner
//=================================
// This is the actual scanner. It searches for tokens it can
// recognise and returns them in a string with the
// tokentype defined in an integer.
//
// text is the address of the string being worked on
// token is the returned value of the token
// ttype is the integer value of the token type
//

void	scanner( char **text, char *token, int *ttype)
{
	// Skip all whitespace
	for ( ; **text == ' ' || **text == '\t' || **text == '\n' || **text == '\r'; (*text)++ );

	// If the string terminates return EOL
	if( **text == '\0' )
	{
		*ttype = EOL;
		return;
	}

	// SYMBOLS
	if( (**text >='A' && **text <='Z') || (**text >='a' && **text <='z') )
	{
		*ttype = SYMBOL;

		while(
			(**text >='A' && **text <='Z') || (**text >='a' && **text <='z')
			|| (**text >='0' && **text <='9')
			)
		{
			*token++ = *(*text)++;
		}
		*token = '\0';	// Terminate the string.
		return;
	}

	// STRING LITERALS
	if( **text == '"' )
	{
		*ttype = STRLIT;
		(*text)++;	// Skip first quote.
		while( **text != '"' && **text )
		{
			*token++ = *(*text)++;
		}
		(*text)++;	// Skip last quote.
		*token = '\0';	// Terminate the string.
		return;
	}

	// NUMERICS
	if( **text >= '0' && **text <= '9' )
	{
		*ttype = INTLIT;
		while( **text >= '0' && **text <= '9' )
		{
			*token++ = *(*text)++;
			if( **text == '.' )
			{
				*ttype = REALLIT;
				*token++ = *(*text)++;
			}
			// I left out the 'e' notation part - we don't need it.
		}
		*token = '\0';	// Terminate the string.
		return;
	}

	// PUNCTUATION SECTION
	if( **text == '(' )
	{
		*ttype = LPAREN;
		*token++ = *(*text)++;
		*token = '\0';
		return;
	}

	if( **text == ')' )
	{
		*ttype = RPAREN;
		*token++ = *(*text)++;
		*token = '\0';
		return;
	}

	if( **text == ';' )
	{
		*ttype = SEMIC;
		*token++ = *(*text)++;
		*token = '\0';
		return;
	}

	if( **text == ':' )
	{
		*ttype = COLON;
		*token++ = *(*text)++;
		*token = '\0';
		return;
	}

	if( **text == ',' )
	{
		*ttype = COMMA;
		*token++ = *(*text)++;
		*token = '\0';
		return;
	}

	if( **text == '.' )
	{
		*ttype = PERIOD;
		*token++ = *(*text)++;
		*token = '\0';
		return;
	}

	if( **text == '\'' )
	{
		*ttype = APOST;
		*token++ = *(*text)++;
		*token = '\0';
		return;
	}

	if( **text == '+' )
	{
		*ttype = PLUSOP;
		*token++ = *(*text)++;
		*token = '\0';
		return;
	}

	if( **text == '-' )
	{
		*ttype = MINUSOP;
		*token++ = *(*text)++;
		*token = '\0';
		return;
	}

	if( **text == '*' )
	{
		*ttype = MUXOP;
		*token++ = *(*text)++;
		*token = '\0';
		return;
	}

	if( **text == '/' )
	{
		*ttype = DIVOP;
		*token++ = *(*text)++;
		*token = '\0';
		return;
	}

	if( **text == '^' )
	{
		*ttype = POWOP;
		*token++ = *(*text)++;
		*token = '\0';
		return;
	}

	if( **text == '=' )
	{
		*ttype = ASSIGNOP;
		*token++ = *(*text)++;
		*token = '\0';
		return;
	}

	if( **text == '#' )
	{
		*ttype = HASH;
		*token++ = *(*text)++;
		*token = '\0';
		return;
	}

	if( **text == '!' )
	{
		*ttype = BANG;
		*token++ = *(*text)++;
		*token = '\0';
		return;
	}


	// Nothing matched - it must be an error
	*ttype = LEXERR;
	return;
}

/*
//*************************************************************************
// Sample driver to test this file with
//

void	parseString( char *test )
{
	char	*sp, *tp;
	int		ttype;
	char	token[81];

	sp = test;	// Set up a pointer to the string;
	ttype = UNDEF;	// Signal "no match found yet"

	// Now scan the string and report each token type and value found
	while( ttype != EOL && ttype != LEXERR )
	{
		// Set tp to point to our return string location
		tp = token;
		// Pass in the correct values to scanner()
		scanner( &sp, tp, &ttype );
		// If we are not done, print what we found.
		if( ttype != EOL )
		{
			printf( "Token type = %s, token = %s\n", nmtoken(ttype), token );
		}
	}
}
//**************************************************************************
*/