preprocessing flag for the lexer

This commit is contained in:
Wolfgang (Blub) Bumiller 2012-11-01 23:22:58 +01:00
parent 2ef0203d17
commit ab86b0470c
2 changed files with 88 additions and 29 deletions

111
lexer.c
View file

@ -294,6 +294,27 @@ static bool isxdigit_only(int ch)
return (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F'); return (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F');
} }
/* Append a character to the token buffer */
static bool GMQCC_WARN lex_tokench(lex_file *lex, int ch)
{
if (!token_value_add(&lex->tok, ch)) {
lexerror(lex, "out of memory");
return false;
}
return true;
}
/* Append a trailing null-byte */
static bool GMQCC_WARN lex_endtoken(lex_file *lex)
{
if (!token_value_add(&lex->tok, 0)) {
lexerror(lex, "out of memory");
return false;
}
lex->tok.value_count--;
return true;
}
/* Skip whitespace and comments and return the first /* Skip whitespace and comments and return the first
* non-white character. * non-white character.
* As this makes use of the above getch() ungetch() functions, * As this makes use of the above getch() ungetch() functions,
@ -333,7 +354,27 @@ static int lex_skipwhite(lex_file *lex)
do do
{ {
ch = lex_getch(lex); ch = lex_getch(lex);
while (ch != EOF && isspace(ch)) ch = lex_getch(lex); while (ch != EOF && isspace(ch)) {
if (lex->flags.preprocessing) {
if (ch == '\n') {
/* end-of-line */
/* see if there was whitespace first */
if (lex->tok.value_count) {
lex_ungetch(lex, ch);
if (!lex_endtoken(lex))
return TOKEN_FATAL;
return TOKEN_WHITE;
}
/* otherwise return EOL */
return TOKEN_EOL;
}
if (!lex_tokench(lex, ch))
return TOKEN_FATAL;
}
ch = lex_getch(lex);
}
if (lex->flags.preprocessing && !lex_tokench(lex, ch))
return TOKEN_FATAL;
if (ch == '/') { if (ch == '/') {
ch = lex_getch(lex); ch = lex_getch(lex);
@ -342,30 +383,59 @@ static int lex_skipwhite(lex_file *lex)
/* one line comment */ /* one line comment */
ch = lex_getch(lex); ch = lex_getch(lex);
/* check for special: '/', '/', '*', '/' */ if (lex->flags.preprocessing) {
if (ch == '*') { if (!lex_tokench(lex, ' ') ||
ch = lex_getch(lex); !lex_tokench(lex, ' '))
if (ch == '/') { {
ch = ' '; return TOKEN_FATAL;
continue;
} }
} }
while (ch != EOF && ch != '\n') { while (ch != EOF && ch != '\n') {
ch = lex_getch(lex); ch = lex_getch(lex);
if (lex->flags.preprocessing && !lex_tokench(lex, ' '))
return TOKEN_FATAL;
}
if (lex->flags.preprocessing) {
lex_ungetch(lex, '\n');
if (!lex_endtoken(lex))
return TOKEN_FATAL;
return TOKEN_WHITE;
} }
continue; continue;
} }
if (ch == '*') if (ch == '*')
{ {
/* multiline comment */ /* multiline comment */
if (lex->flags.preprocessing) {
if (!lex_tokench(lex, ' ') ||
!lex_tokench(lex, ' '))
{
return TOKEN_FATAL;
}
}
while (ch != EOF) while (ch != EOF)
{ {
ch = lex_getch(lex); ch = lex_getch(lex);
if (ch == '*') { if (ch == '*') {
ch = lex_getch(lex); ch = lex_getch(lex);
if (ch == '/') if (ch == '/') {
if (lex->flags.preprocessing) {
if (!lex_tokench(lex, ' ') ||
!lex_tokench(lex, ' '))
{
return TOKEN_FATAL;
}
}
break; break;
}
}
if (lex->flags.preprocessing) {
if (ch != '\n')
ch = ' ';
if (!lex_tokench(lex, ch))
return TOKEN_FATAL;
} }
} }
ch = ' '; /* cause TRUE in the isspace check */ ch = ' '; /* cause TRUE in the isspace check */
@ -381,27 +451,6 @@ static int lex_skipwhite(lex_file *lex)
return ch; return ch;
} }
/* Append a character to the token buffer */
static bool GMQCC_WARN lex_tokench(lex_file *lex, int ch)
{
if (!token_value_add(&lex->tok, ch)) {
lexerror(lex, "out of memory");
return false;
}
return true;
}
/* Append a trailing null-byte */
static bool GMQCC_WARN lex_endtoken(lex_file *lex)
{
if (!token_value_add(&lex->tok, 0)) {
lexerror(lex, "out of memory");
return false;
}
lex->tok.value_count--;
return true;
}
/* Get a token */ /* Get a token */
static bool GMQCC_WARN lex_finish_ident(lex_file *lex) static bool GMQCC_WARN lex_finish_ident(lex_file *lex)
{ {
@ -626,6 +675,10 @@ int lex_do(lex_file *lex)
lex->tok.ctx.line = lex->sline; lex->tok.ctx.line = lex->sline;
lex->tok.ctx.file = lex->name; lex->tok.ctx.file = lex->name;
if (lex->flags.preprocessing && (ch == TOKEN_WHITE || ch == TOKEN_EOL || TOKEN_FATAL)) {
return (lex->tok.ttype = ch);
}
if (lex->eof) if (lex->eof)
return (lex->tok.ttype = TOKEN_FATAL); return (lex->tok.ttype = TOKEN_FATAL);

View file

@ -57,6 +57,9 @@ enum {
TOKEN_INTCONST, TOKEN_INTCONST,
TOKEN_FLOATCONST, TOKEN_FLOATCONST,
TOKEN_WHITE,
TOKEN_EOL,
TOKEN_EOF, TOKEN_EOF,
/* We use '< TOKEN_ERROR', so TOKEN_FATAL must come after it and any /* We use '< TOKEN_ERROR', so TOKEN_FATAL must come after it and any
@ -78,6 +81,8 @@ static const char *_tokennames[] = {
"TOKEN_VECTORCONST", "TOKEN_VECTORCONST",
"TOKEN_INTCONST", "TOKEN_INTCONST",
"TOKEN_FLOATCONST", "TOKEN_FLOATCONST",
"TOKEN_WHITE",
"TOKEN_EOL",
"TOKEN_EOF", "TOKEN_EOF",
"TOKEN_ERROR", "TOKEN_ERROR",
"TOKEN_FATAL", "TOKEN_FATAL",
@ -109,6 +114,7 @@ typedef struct {
struct { struct {
bool noops; bool noops;
bool nodigraphs; /* used when lexing string constants */ bool nodigraphs; /* used when lexing string constants */
bool preprocessing; /* whitespace and EOLs become actual tokens */
} flags; } flags;
int framevalue; int framevalue;