mirror of
https://github.com/ZDoom/qzdoom-gpl.git
synced 2025-01-24 23:50:59 +00:00
341 lines
9.3 KiB
ReasonML
341 lines
9.3 KiB
ReasonML
|
/*
|
||
|
* A push-model scanner example for re2c -f
|
||
|
* Written Mon Apr 11 2005 by mgix@mgix.com
|
||
|
* This file is in the public domain.
|
||
|
*
|
||
|
*/
|
||
|
|
||
|
// ----------------------------------------------------------------------
|
||
|
|
||
|
#include <fcntl.h>
|
||
|
#include <stdio.h>
|
||
|
#include <stddef.h>
|
||
|
#include <stdlib.h>
|
||
|
#include <string.h>
|
||
|
|
||
|
#if defined(WIN32)
|
||
|
|
||
|
typedef signed char int8_t;
|
||
|
typedef signed short int16_t;
|
||
|
typedef signed int int32_t;
|
||
|
|
||
|
typedef unsigned char uint8_t;
|
||
|
typedef unsigned short uint16_t;
|
||
|
typedef unsigned int uint32_t;
|
||
|
|
||
|
#else
|
||
|
|
||
|
#include <stdint.h>
|
||
|
#include <unistd.h>
|
||
|
|
||
|
#ifndef O_BINARY
|
||
|
#define O_BINARY 0
|
||
|
#endif
|
||
|
|
||
|
#endif
|
||
|
|
||
|
// ----------------------------------------------------------------------
|
||
|
#define TOKENS \
|
||
|
\
|
||
|
TOK(kEOF) \
|
||
|
TOK(kEOL) \
|
||
|
TOK(kUnknown) \
|
||
|
TOK(kIdentifier) \
|
||
|
TOK(kDecimalConstant) \
|
||
|
\
|
||
|
TOK(kEqual) \
|
||
|
TOK(kLeftParen) \
|
||
|
TOK(kRightParen) \
|
||
|
TOK(kMinus) \
|
||
|
TOK(kPlus) \
|
||
|
TOK(kStar) \
|
||
|
TOK(kSlash) \
|
||
|
\
|
||
|
TOK(kIf) \
|
||
|
TOK(kFor) \
|
||
|
TOK(kElse) \
|
||
|
TOK(kGoto) \
|
||
|
TOK(kBreak) \
|
||
|
TOK(kWhile) \
|
||
|
TOK(kReturn) \
|
||
|
|
||
|
|
||
|
// ----------------------------------------------------------------------
|
||
|
static const char *tokenNames[] =
|
||
|
{
|
||
|
#define TOK(x) #x,
|
||
|
TOKENS
|
||
|
#undef TOK
|
||
|
};
|
||
|
|
||
|
// ----------------------------------------------------------------------
|
||
|
class PushScanner
|
||
|
{
|
||
|
public:
|
||
|
|
||
|
enum Token
|
||
|
{
|
||
|
#define TOK(x) x,
|
||
|
TOKENS
|
||
|
#undef TOK
|
||
|
};
|
||
|
|
||
|
private:
|
||
|
|
||
|
bool eof;
|
||
|
int32_t state;
|
||
|
|
||
|
uint8_t *limit;
|
||
|
uint8_t *start;
|
||
|
uint8_t *cursor;
|
||
|
uint8_t *marker;
|
||
|
|
||
|
uint8_t *buffer;
|
||
|
uint8_t *bufferEnd;
|
||
|
|
||
|
uint8_t yych;
|
||
|
uint32_t yyaccept;
|
||
|
|
||
|
public:
|
||
|
|
||
|
// ----------------------------------------------------------------------
|
||
|
PushScanner()
|
||
|
{
|
||
|
limit = 0;
|
||
|
start = 0;
|
||
|
state = -1;
|
||
|
cursor = 0;
|
||
|
marker = 0;
|
||
|
buffer = 0;
|
||
|
eof = false;
|
||
|
bufferEnd = 0;
|
||
|
}
|
||
|
|
||
|
// ----------------------------------------------------------------------
|
||
|
~PushScanner()
|
||
|
{
|
||
|
}
|
||
|
|
||
|
// ----------------------------------------------------------------------
|
||
|
void send(
|
||
|
Token token
|
||
|
)
|
||
|
{
|
||
|
size_t tokenSize = cursor-start;
|
||
|
const char *tokenName = tokenNames[token];
|
||
|
printf(
|
||
|
"scanner is pushing out a token of type %d (%s)",
|
||
|
token,
|
||
|
tokenName
|
||
|
);
|
||
|
|
||
|
if(token==kEOF) putchar('\n');
|
||
|
else
|
||
|
{
|
||
|
size_t tokenNameSize = strlen(tokenNames[token]);
|
||
|
size_t padSize = 20-(20<tokenNameSize ? 20 : tokenNameSize);
|
||
|
for(size_t i=0; i<padSize; ++i) putchar(' ');
|
||
|
printf(" : ---->");
|
||
|
|
||
|
fwrite(
|
||
|
start,
|
||
|
tokenSize,
|
||
|
1,
|
||
|
stdout
|
||
|
);
|
||
|
|
||
|
printf("<----\n");
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// ----------------------------------------------------------------------
|
||
|
uint32_t push(
|
||
|
const void *input,
|
||
|
ssize_t inputSize
|
||
|
)
|
||
|
{
|
||
|
printf(
|
||
|
"scanner is receiving a new data batch of length %d\n"
|
||
|
"scanner continues with saved state = %d\n",
|
||
|
inputSize,
|
||
|
state
|
||
|
);
|
||
|
|
||
|
/*
|
||
|
* Data source is signaling end of file when batch size
|
||
|
* is less than maxFill. This is slightly annoying because
|
||
|
* maxFill is a value that can only be known after re2c does
|
||
|
* its thing. Practically though, maxFill is never bigger than
|
||
|
* the longest keyword, so given our grammar, 32 is a safe bet.
|
||
|
*/
|
||
|
uint8_t null[64];
|
||
|
const ssize_t maxFill = 32;
|
||
|
if(inputSize<maxFill)
|
||
|
{
|
||
|
eof = true;
|
||
|
input = null;
|
||
|
inputSize = sizeof(null);
|
||
|
memset(null, 0, sizeof(null));
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* When we get here, we have a partially
|
||
|
* consumed buffer which is in the following state:
|
||
|
* last valid char last valid buffer spot
|
||
|
* v v
|
||
|
* +-------------------+-------------+---------------+-------------+----------------------+
|
||
|
* ^ ^ ^ ^ ^ ^
|
||
|
* buffer start marker cursor limit bufferEnd
|
||
|
*
|
||
|
* We need to stretch the buffer and concatenate the new chunk of input to it
|
||
|
*
|
||
|
*/
|
||
|
size_t used = limit-buffer;
|
||
|
size_t needed = used+inputSize;
|
||
|
size_t allocated = bufferEnd-buffer;
|
||
|
if(allocated<needed)
|
||
|
{
|
||
|
size_t limitOffset = limit-buffer;
|
||
|
size_t startOffset = start-buffer;
|
||
|
size_t markerOffset = marker-buffer;
|
||
|
size_t cursorOffset = cursor-buffer;
|
||
|
|
||
|
buffer = (uint8_t*)realloc(buffer, needed);
|
||
|
bufferEnd = needed+buffer;
|
||
|
|
||
|
marker = markerOffset + buffer;
|
||
|
cursor = cursorOffset + buffer;
|
||
|
start = buffer + startOffset;
|
||
|
limit = limitOffset + buffer;
|
||
|
}
|
||
|
memcpy(limit, input, inputSize);
|
||
|
limit += inputSize;
|
||
|
|
||
|
// The scanner starts here
|
||
|
#define YYLIMIT limit
|
||
|
#define YYCURSOR cursor
|
||
|
#define YYMARKER marker
|
||
|
#define YYCTYPE uint8_t
|
||
|
|
||
|
#define SKIP(x) { start = cursor; goto yy0; }
|
||
|
#define SEND(x) { send(x); SKIP(); }
|
||
|
#define YYFILL(n) { goto fill; }
|
||
|
|
||
|
#define YYGETSTATE() state
|
||
|
#define YYSETSTATE(x) { state = (x); }
|
||
|
|
||
|
start:
|
||
|
|
||
|
/*!re2c
|
||
|
|
||
|
eol = "\n";
|
||
|
eof = "\000";
|
||
|
digit = [0-9];
|
||
|
integer = digit+;
|
||
|
alpha = [A-Za-z_];
|
||
|
any = [\000-\0377];
|
||
|
space = [ \h\t\v\f\r];
|
||
|
|
||
|
"if" { SEND(kIf); }
|
||
|
"for" { SEND(kFor); }
|
||
|
"else" { SEND(kElse); }
|
||
|
"goto" { SEND(kGoto); }
|
||
|
"break" { SEND(kBreak); }
|
||
|
"while" { SEND(kWhile); }
|
||
|
"return" { SEND(kReturn); }
|
||
|
alpha (alpha|digit)* { SEND(kIdentifier); }
|
||
|
integer { SEND(kDecimalConstant);}
|
||
|
|
||
|
"=" { SEND(kEqual); }
|
||
|
"(" { SEND(kLeftParen); }
|
||
|
")" { SEND(kRightParen); }
|
||
|
"-" { SEND(kMinus); }
|
||
|
"+" { SEND(kPlus); }
|
||
|
"*" { SEND(kStar); }
|
||
|
"/" { SEND(kSlash); }
|
||
|
|
||
|
eol { SKIP(); }
|
||
|
space { SKIP(); }
|
||
|
eof { send(kEOF); return 1; }
|
||
|
any { SEND(kUnknown); }
|
||
|
*/
|
||
|
|
||
|
fill:
|
||
|
ssize_t unfinishedSize = cursor-start;
|
||
|
printf(
|
||
|
"scanner needs a refill. Exiting for now with:\n"
|
||
|
" saved fill state = %d\n"
|
||
|
" unfinished token size = %d\n",
|
||
|
state,
|
||
|
unfinishedSize
|
||
|
);
|
||
|
|
||
|
if(0<unfinishedSize && start<limit)
|
||
|
{
|
||
|
printf(" unfinished token is :");
|
||
|
fwrite(start, 1, cursor-start, stdout);
|
||
|
putchar('\n');
|
||
|
}
|
||
|
putchar('\n');
|
||
|
|
||
|
/*
|
||
|
* Once we get here, we can get rid of
|
||
|
* everything before start and after limit.
|
||
|
*/
|
||
|
if(eof==true) goto start;
|
||
|
if(buffer<start)
|
||
|
{
|
||
|
size_t startOffset = start-buffer;
|
||
|
memmove(buffer, start, limit-start);
|
||
|
marker -= startOffset;
|
||
|
cursor -= startOffset;
|
||
|
limit -= startOffset;
|
||
|
start -= startOffset;
|
||
|
}
|
||
|
return 0;
|
||
|
}
|
||
|
};
|
||
|
|
||
|
// ----------------------------------------------------------------------
|
||
|
int main(
|
||
|
int argc,
|
||
|
char **argv
|
||
|
)
|
||
|
{
|
||
|
// Parse cmd line
|
||
|
int input = 0;
|
||
|
if(1<argc)
|
||
|
{
|
||
|
input = open(argv[1], O_RDONLY | O_BINARY);
|
||
|
if(input<0)
|
||
|
{
|
||
|
fprintf(
|
||
|
stderr,
|
||
|
"could not open file %s\n",
|
||
|
argv[1]
|
||
|
);
|
||
|
exit(1);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Tokenize input file by pushing batches
|
||
|
* of data one by one into the scanner.
|
||
|
*/
|
||
|
const size_t batchSize = 256;
|
||
|
uint8_t buffer[batchSize];
|
||
|
PushScanner scanner;
|
||
|
while(1)
|
||
|
{
|
||
|
ssize_t n = read(input, buffer, batchSize);
|
||
|
scanner.push(buffer, n);
|
||
|
if(n<batchSize) break;
|
||
|
}
|
||
|
scanner.push(0, -1);
|
||
|
close(input);
|
||
|
|
||
|
// Done
|
||
|
return 0;
|
||
|
}
|
||
|
|