qzdoom-gpl/tools/re2c/examples/push_model/push.re
2016-02-06 20:58:35 -06:00

340 lines
9.3 KiB
C++

/*
* A push-model scanner example for re2c -f
* Written Mon Apr 11 2005 by mgix@mgix.com
* This file is in the public domain.
*
*/
// ----------------------------------------------------------------------
#include <fcntl.h>
#include <stdio.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#if defined(WIN32)
typedef signed char int8_t;
typedef signed short int16_t;
typedef signed int int32_t;
typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef unsigned int uint32_t;
#else
#include <stdint.h>
#include <unistd.h>
#ifndef O_BINARY
#define O_BINARY 0
#endif
#endif
// ----------------------------------------------------------------------
#define TOKENS \
\
TOK(kEOF) \
TOK(kEOL) \
TOK(kUnknown) \
TOK(kIdentifier) \
TOK(kDecimalConstant) \
\
TOK(kEqual) \
TOK(kLeftParen) \
TOK(kRightParen) \
TOK(kMinus) \
TOK(kPlus) \
TOK(kStar) \
TOK(kSlash) \
\
TOK(kIf) \
TOK(kFor) \
TOK(kElse) \
TOK(kGoto) \
TOK(kBreak) \
TOK(kWhile) \
TOK(kReturn) \
// ----------------------------------------------------------------------
static const char *tokenNames[] =
{
#define TOK(x) #x,
TOKENS
#undef TOK
};
// ----------------------------------------------------------------------
class PushScanner
{
public:
enum Token
{
#define TOK(x) x,
TOKENS
#undef TOK
};
private:
bool eof;
int32_t state;
uint8_t *limit;
uint8_t *start;
uint8_t *cursor;
uint8_t *marker;
uint8_t *buffer;
uint8_t *bufferEnd;
uint8_t yych;
uint32_t yyaccept;
public:
// ----------------------------------------------------------------------
PushScanner()
{
limit = 0;
start = 0;
state = -1;
cursor = 0;
marker = 0;
buffer = 0;
eof = false;
bufferEnd = 0;
}
// ----------------------------------------------------------------------
~PushScanner()
{
}
// ----------------------------------------------------------------------
void send(
Token token
)
{
size_t tokenSize = cursor-start;
const char *tokenName = tokenNames[token];
printf(
"scanner is pushing out a token of type %d (%s)",
token,
tokenName
);
if(token==kEOF) putchar('\n');
else
{
size_t tokenNameSize = strlen(tokenNames[token]);
size_t padSize = 20-(20<tokenNameSize ? 20 : tokenNameSize);
for(size_t i=0; i<padSize; ++i) putchar(' ');
printf(" : ---->");
fwrite(
start,
tokenSize,
1,
stdout
);
printf("<----\n");
}
}
// ----------------------------------------------------------------------
uint32_t push(
const void *input,
ssize_t inputSize
)
{
printf(
"scanner is receiving a new data batch of length %d\n"
"scanner continues with saved state = %d\n",
inputSize,
state
);
/*
* Data source is signaling end of file when batch size
* is less than maxFill. This is slightly annoying because
* maxFill is a value that can only be known after re2c does
* its thing. Practically though, maxFill is never bigger than
* the longest keyword, so given our grammar, 32 is a safe bet.
*/
uint8_t null[64];
const ssize_t maxFill = 32;
if(inputSize<maxFill)
{
eof = true;
input = null;
inputSize = sizeof(null);
memset(null, 0, sizeof(null));
}
/*
* When we get here, we have a partially
* consumed buffer which is in the following state:
* last valid char last valid buffer spot
* v v
* +-------------------+-------------+---------------+-------------+----------------------+
* ^ ^ ^ ^ ^ ^
* buffer start marker cursor limit bufferEnd
*
* We need to stretch the buffer and concatenate the new chunk of input to it
*
*/
size_t used = limit-buffer;
size_t needed = used+inputSize;
size_t allocated = bufferEnd-buffer;
if(allocated<needed)
{
size_t limitOffset = limit-buffer;
size_t startOffset = start-buffer;
size_t markerOffset = marker-buffer;
size_t cursorOffset = cursor-buffer;
buffer = (uint8_t*)realloc(buffer, needed);
bufferEnd = needed+buffer;
marker = markerOffset + buffer;
cursor = cursorOffset + buffer;
start = buffer + startOffset;
limit = limitOffset + buffer;
}
memcpy(limit, input, inputSize);
limit += inputSize;
// The scanner starts here
#define YYLIMIT limit
#define YYCURSOR cursor
#define YYMARKER marker
#define YYCTYPE uint8_t
#define SKIP(x) { start = cursor; goto yy0; }
#define SEND(x) { send(x); SKIP(); }
#define YYFILL(n) { goto fill; }
#define YYGETSTATE() state
#define YYSETSTATE(x) { state = (x); }
start:
/*!re2c
re2c:startlabel = 1;
eol = "\n";
eof = "\000";
digit = [0-9];
integer = digit+;
alpha = [A-Za-z_];
any = [\000-\377];
space = [ \h\t\v\f\r];
"if" { SEND(kIf); }
"for" { SEND(kFor); }
"else" { SEND(kElse); }
"goto" { SEND(kGoto); }
"break" { SEND(kBreak); }
"while" { SEND(kWhile); }
"return" { SEND(kReturn); }
alpha (alpha|digit)* { SEND(kIdentifier); }
integer { SEND(kDecimalConstant);}
"=" { SEND(kEqual); }
"(" { SEND(kLeftParen); }
")" { SEND(kRightParen); }
"-" { SEND(kMinus); }
"+" { SEND(kPlus); }
"*" { SEND(kStar); }
"/" { SEND(kSlash); }
eol { SKIP(); }
space { SKIP(); }
eof { send(kEOF); return 1; }
any { SEND(kUnknown); }
*/
fill:
ssize_t unfinishedSize = cursor-start;
printf(
"scanner needs a refill. Exiting for now with:\n"
" saved fill state = %d\n"
" unfinished token size = %d\n",
state,
unfinishedSize
);
if(0<unfinishedSize && start<limit)
{
printf(" unfinished token is :");
fwrite(start, 1, cursor-start, stdout);
putchar('\n');
}
putchar('\n');
/*
* Once we get here, we can get rid of
* everything before start and after limit.
*/
if(eof==true) goto start;
if(buffer<start)
{
size_t startOffset = start-buffer;
memmove(buffer, start, limit-start);
marker -= startOffset;
cursor -= startOffset;
limit -= startOffset;
start -= startOffset;
}
return 0;
}
};
// ----------------------------------------------------------------------
int main(
int argc,
char **argv
)
{
// Parse cmd line
int input = 0;
if(1<argc)
{
input = open(argv[1], O_RDONLY | O_BINARY);
if(input<0)
{
fprintf(
stderr,
"could not open file %s\n",
argv[1]
);
exit(1);
}
}
/*
* Tokenize input file by pushing batches
* of data one by one into the scanner.
*/
const size_t batchSize = 256;
uint8_t buffer[batchSize];
PushScanner scanner;
while(1)
{
ssize_t n = read(input, buffer, batchSize);
scanner.push(buffer, n);
if(n<batchSize) break;
}
scanner.push(0, -1);
close(input);
// Done
return 0;
}