316 lines
6.1 KiB
C
316 lines
6.1 KiB
C
/*
|
|
* $Header: /LicenseToKill/src/acesrc/botscan.c 1 16/10/99 8:41 Riever $
|
|
*
|
|
* $Log: /LicenseToKill/src/acesrc/botscan.c $
|
|
*
|
|
* 1 16/10/99 8:41 Riever
|
|
* Initial import to LTK
|
|
*
|
|
* 4 15/10/99 8:40 Riever
|
|
* Added carriage return '\r' to whitespace definition.
|
|
*
|
|
* 3 15/10/99 8:11 Riever
|
|
* Forgot to step over last quote in STRLIT - ok now.
|
|
*
|
|
* 2 15/10/99 7:01 Riever
|
|
* Fixed a bug in INTLIT parsing
|
|
*
|
|
* 1 14/10/99 8:26 Riever
|
|
*
|
|
* 4 14/10/99 7:21 Riever
|
|
* Defined all other operators and symbols that will be used.
|
|
* Left out the 'e' notation for REAL numbers since we don't use that.
|
|
*
|
|
* 3 14/10/99 7:02 Riever
|
|
* Changed filename to "botscan.c" to make it clear where it belongs in
|
|
* this project.
|
|
* Added a test parser function in comment block at end of file.
|
|
*
|
|
* 2 14/10/99 6:50 Riever
|
|
* First version - just a compilation check.
|
|
*
|
|
*/
|
|
//=================================================================
|
|
// botscan.c
|
|
//
|
|
// Connor Caple 14th October 1999
|
|
//
|
|
// A lexical scanner module to allow much more complex configuration files
|
|
// Original code idea : Lee & Mark Atkinson, "Using C", pub. Que
|
|
//
|
|
// Sample driver included at bottom of file.
|
|
//
|
|
//=================================================================
|
|
|
|
#include "botscan.h"
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
|
|
//==============================
|
|
// nmtoken
|
|
//==============================
|
|
// This is only used in testing. It returns a string that matches
|
|
// the type of token returned in the scanner
|
|
// Very useful when debugging new configuration files
|
|
//
|
|
// I may write a stand alone config file tester to ensure that
|
|
// people use the instructions correctly.
|
|
//
|
|
char *nmtoken( int ttype)
|
|
{
|
|
static char *tokenNames[] = {
|
|
"LEXERR",
|
|
"SYMBOL",
|
|
"INTLIT",
|
|
"REALLIT",
|
|
"STRLIT",
|
|
"LPAREN",
|
|
"RPAREN",
|
|
"SEMIC",
|
|
"COLON",
|
|
"COMMA",
|
|
"PERIOD",
|
|
"APOST",
|
|
"PLUSOP",
|
|
"MINUSOP",
|
|
"MUXOP",
|
|
"DIVOP",
|
|
"POWOP",
|
|
"ASSIGNOP",
|
|
"HASH",
|
|
"BANG",
|
|
"EOL",
|
|
"UNDEF"
|
|
};
|
|
return( tokenNames[ttype] );
|
|
}
|
|
|
|
//=================================
|
|
// scanner
|
|
//=================================
|
|
// This is the actual scanner. It searches for tokens it can
|
|
// recognise and returns them in a string with the
|
|
// tokentype defined in an integer.
|
|
//
|
|
// text is the address of the string being worked on
|
|
// token is the returned value of the token
|
|
// ttype is the integer value of the token type
|
|
//
|
|
|
|
void scanner( char **text, char *token, int *ttype)
|
|
{
|
|
// Skip all whitespace
|
|
for ( ; **text == ' ' || **text == '\t' || **text == '\n' || **text == '\r'; (*text)++ );
|
|
|
|
// If the string terminates return EOL
|
|
if( **text == '\0' )
|
|
{
|
|
*ttype = EOL;
|
|
return;
|
|
}
|
|
|
|
// SYMBOLS
|
|
if( (**text >='A' && **text <='Z') || (**text >='a' && **text <='z') )
|
|
{
|
|
*ttype = SYMBOL;
|
|
|
|
while(
|
|
(**text >='A' && **text <='Z') || (**text >='a' && **text <='z')
|
|
|| (**text >='0' && **text <='9')
|
|
)
|
|
{
|
|
*token++ = *(*text)++;
|
|
}
|
|
*token = '\0'; // Terminate the string.
|
|
return;
|
|
}
|
|
|
|
// STRING LITERALS
|
|
if( **text == '"' )
|
|
{
|
|
*ttype = STRLIT;
|
|
(*text)++; // Skip first quote.
|
|
while( **text != '"' && **text )
|
|
{
|
|
*token++ = *(*text)++;
|
|
}
|
|
(*text)++; // Skip last quote.
|
|
*token = '\0'; // Terminate the string.
|
|
return;
|
|
}
|
|
|
|
// NUMERICS
|
|
if( **text >= '0' && **text <= '9' )
|
|
{
|
|
*ttype = INTLIT;
|
|
while( **text >= '0' && **text <= '9' )
|
|
{
|
|
*token++ = *(*text)++;
|
|
if( **text == '.' )
|
|
{
|
|
*ttype = REALLIT;
|
|
*token++ = *(*text)++;
|
|
}
|
|
// I left out the 'e' notation part - we don't need it.
|
|
}
|
|
*token = '\0'; // Terminate the string.
|
|
return;
|
|
}
|
|
|
|
// PUNCTUATION SECTION
|
|
if( **text == '(' )
|
|
{
|
|
*ttype = LPAREN;
|
|
*token++ = *(*text)++;
|
|
*token = '\0';
|
|
return;
|
|
}
|
|
|
|
if( **text == ')' )
|
|
{
|
|
*ttype = RPAREN;
|
|
*token++ = *(*text)++;
|
|
*token = '\0';
|
|
return;
|
|
}
|
|
|
|
if( **text == ';' )
|
|
{
|
|
*ttype = SEMIC;
|
|
*token++ = *(*text)++;
|
|
*token = '\0';
|
|
return;
|
|
}
|
|
|
|
if( **text == ':' )
|
|
{
|
|
*ttype = COLON;
|
|
*token++ = *(*text)++;
|
|
*token = '\0';
|
|
return;
|
|
}
|
|
|
|
if( **text == ',' )
|
|
{
|
|
*ttype = COMMA;
|
|
*token++ = *(*text)++;
|
|
*token = '\0';
|
|
return;
|
|
}
|
|
|
|
if( **text == '.' )
|
|
{
|
|
*ttype = PERIOD;
|
|
*token++ = *(*text)++;
|
|
*token = '\0';
|
|
return;
|
|
}
|
|
|
|
if( **text == '\'' )
|
|
{
|
|
*ttype = APOST;
|
|
*token++ = *(*text)++;
|
|
*token = '\0';
|
|
return;
|
|
}
|
|
|
|
if( **text == '+' )
|
|
{
|
|
*ttype = PLUSOP;
|
|
*token++ = *(*text)++;
|
|
*token = '\0';
|
|
return;
|
|
}
|
|
|
|
if( **text == '-' )
|
|
{
|
|
*ttype = MINUSOP;
|
|
*token++ = *(*text)++;
|
|
*token = '\0';
|
|
return;
|
|
}
|
|
|
|
if( **text == '*' )
|
|
{
|
|
*ttype = MUXOP;
|
|
*token++ = *(*text)++;
|
|
*token = '\0';
|
|
return;
|
|
}
|
|
|
|
if( **text == '/' )
|
|
{
|
|
*ttype = DIVOP;
|
|
*token++ = *(*text)++;
|
|
*token = '\0';
|
|
return;
|
|
}
|
|
|
|
if( **text == '^' )
|
|
{
|
|
*ttype = POWOP;
|
|
*token++ = *(*text)++;
|
|
*token = '\0';
|
|
return;
|
|
}
|
|
|
|
if( **text == '=' )
|
|
{
|
|
*ttype = ASSIGNOP;
|
|
*token++ = *(*text)++;
|
|
*token = '\0';
|
|
return;
|
|
}
|
|
|
|
if( **text == '#' )
|
|
{
|
|
*ttype = HASH;
|
|
*token++ = *(*text)++;
|
|
*token = '\0';
|
|
return;
|
|
}
|
|
|
|
if( **text == '!' )
|
|
{
|
|
*ttype = BANG;
|
|
*token++ = *(*text)++;
|
|
*token = '\0';
|
|
return;
|
|
}
|
|
|
|
|
|
// Nothing matched - it must be an error
|
|
*ttype = LEXERR;
|
|
return;
|
|
}
|
|
|
|
/*
|
|
//*************************************************************************
|
|
// Sample driver to test this file with
|
|
//
|
|
|
|
void parseString( char *test )
|
|
{
|
|
char *sp, *tp;
|
|
int ttype;
|
|
char token[81];
|
|
|
|
sp = test; // Set up a pointer to the string;
|
|
ttype = UNDEF; // Signal "no match found yet"
|
|
|
|
// Now scan the string and report each token type and value found
|
|
while( ttype != EOL && ttype != LEXERR )
|
|
{
|
|
// Set tp to point to our return string location
|
|
tp = token;
|
|
// Pass in the correct values to scanner()
|
|
scanner( &sp, tp, &ttype );
|
|
// If we are not done, print what we found.
|
|
if( ttype != EOL )
|
|
{
|
|
printf( "Token type = %s, token = %s\n", nmtoken(ttype), token );
|
|
}
|
|
}
|
|
}
|
|
//**************************************************************************
|
|
*/ |