diff --git a/lexer.c b/lexer.c index 02a7ff4..eb215f4 100644 --- a/lexer.c +++ b/lexer.c @@ -294,6 +294,27 @@ static bool isxdigit_only(int ch) return (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F'); } +/* Append a character to the token buffer */ +static bool GMQCC_WARN lex_tokench(lex_file *lex, int ch) +{ + if (!token_value_add(&lex->tok, ch)) { + lexerror(lex, "out of memory"); + return false; + } + return true; +} + +/* Append a trailing null-byte */ +static bool GMQCC_WARN lex_endtoken(lex_file *lex) +{ + if (!token_value_add(&lex->tok, 0)) { + lexerror(lex, "out of memory"); + return false; + } + lex->tok.value_count--; + return true; +} + /* Skip whitespace and comments and return the first * non-white character. * As this makes use of the above getch() ungetch() functions, @@ -333,7 +354,27 @@ static int lex_skipwhite(lex_file *lex) do { ch = lex_getch(lex); - while (ch != EOF && isspace(ch)) ch = lex_getch(lex); + while (ch != EOF && isspace(ch)) { + if (lex->flags.preprocessing) { + if (ch == '\n') { + /* end-of-line */ + /* see if there was whitespace first */ + if (lex->tok.value_count) { + lex_ungetch(lex, ch); + if (!lex_endtoken(lex)) + return TOKEN_FATAL; + return TOKEN_WHITE; + } + /* otherwise return EOL */ + return TOKEN_EOL; + } + if (!lex_tokench(lex, ch)) + return TOKEN_FATAL; + } + ch = lex_getch(lex); + } + if (lex->flags.preprocessing && !lex_tokench(lex, ch)) + return TOKEN_FATAL; if (ch == '/') { ch = lex_getch(lex); @@ -342,30 +383,59 @@ static int lex_skipwhite(lex_file *lex) /* one line comment */ ch = lex_getch(lex); - /* check for special: '/', '/', '*', '/' */ - if (ch == '*') { - ch = lex_getch(lex); - if (ch == '/') { - ch = ' '; - continue; + if (lex->flags.preprocessing) { + if (!lex_tokench(lex, ' ') || + !lex_tokench(lex, ' ')) + { + return TOKEN_FATAL; } } while (ch != EOF && ch != '\n') { ch = lex_getch(lex); + if (lex->flags.preprocessing && !lex_tokench(lex, ' ')) + return TOKEN_FATAL; + } + if (lex->flags.preprocessing) { + lex_ungetch(lex, '\n'); + if (!lex_endtoken(lex)) + return TOKEN_FATAL; + return TOKEN_WHITE; } continue; } if (ch == '*') { /* multiline comment */ + if (lex->flags.preprocessing) { + if (!lex_tokench(lex, ' ') || + !lex_tokench(lex, ' ')) + { + return TOKEN_FATAL; + } + } + while (ch != EOF) { ch = lex_getch(lex); if (ch == '*') { ch = lex_getch(lex); - if (ch == '/') + if (ch == '/') { + if (lex->flags.preprocessing) { + if (!lex_tokench(lex, ' ') || + !lex_tokench(lex, ' ')) + { + return TOKEN_FATAL; + } + } break; + } + } + if (lex->flags.preprocessing) { + if (ch != '\n') + ch = ' '; + if (!lex_tokench(lex, ch)) + return TOKEN_FATAL; } } ch = ' '; /* cause TRUE in the isspace check */ @@ -381,27 +451,6 @@ static int lex_skipwhite(lex_file *lex) return ch; } -/* Append a character to the token buffer */ -static bool GMQCC_WARN lex_tokench(lex_file *lex, int ch) -{ - if (!token_value_add(&lex->tok, ch)) { - lexerror(lex, "out of memory"); - return false; - } - return true; -} - -/* Append a trailing null-byte */ -static bool GMQCC_WARN lex_endtoken(lex_file *lex) -{ - if (!token_value_add(&lex->tok, 0)) { - lexerror(lex, "out of memory"); - return false; - } - lex->tok.value_count--; - return true; -} - /* Get a token */ static bool GMQCC_WARN lex_finish_ident(lex_file *lex) { @@ -626,6 +675,10 @@ int lex_do(lex_file *lex) lex->tok.ctx.line = lex->sline; lex->tok.ctx.file = lex->name; + if (lex->flags.preprocessing && (ch == TOKEN_WHITE || ch == TOKEN_EOL || TOKEN_FATAL)) { + return (lex->tok.ttype = ch); + } + if (lex->eof) return (lex->tok.ttype = TOKEN_FATAL); diff --git a/lexer.h b/lexer.h index 8d525a5..2142da6 100644 --- a/lexer.h +++ b/lexer.h @@ -57,6 +57,9 @@ enum { TOKEN_INTCONST, TOKEN_FLOATCONST, + TOKEN_WHITE, + TOKEN_EOL, + TOKEN_EOF, /* We use '< TOKEN_ERROR', so TOKEN_FATAL must come after it and any @@ -78,6 +81,8 @@ static const char *_tokennames[] = { "TOKEN_VECTORCONST", "TOKEN_INTCONST", "TOKEN_FLOATCONST", + "TOKEN_WHITE", + "TOKEN_EOL", "TOKEN_EOF", "TOKEN_ERROR", "TOKEN_FATAL", @@ -109,6 +114,7 @@ typedef struct { struct { bool noops; bool nodigraphs; /* used when lexing string constants */ + bool preprocessing; /* whitespace and EOLs become actual tokens */ } flags; int framevalue;