gmqcc/lex.c

354 lines
9.2 KiB
C

/*
* Copyright (C) 2012
* Dale Weiler
*
* Permission is hereby granted, free of charge, to any person obtaining a copy of
* this software and associated documentation files (the "Software"), to deal in
* the Software without restriction, including without limitation the rights to
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is furnished to do
* so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "gmqcc.h"
/*
* Keywords are multichar, punctuation lexing is a bit more complicated
* than keyword lexing.
*/
static const char *const lex_keywords[] = {
"do", "else", "if", "while",
"break", "continue", "return", "goto",
"for", "typedef"
};
void lex_init(const char *file, lex_file **set) {
lex_file *lex = mem_a(sizeof(lex_file));
if (!lex)
return;
lex->file = fopen(file, "r");
if (!lex->file) {
mem_d(lex);
return;
}
fseek(lex->file, 0, SEEK_END);
lex->length = ftell(lex->file);
lex->size = lex->length; /* copy, this is never changed */
fseek(lex->file, 0, SEEK_SET);
lex->last = 0;
lex->line = 1;
memset(lex->peek, 0, sizeof(lex->peek));
*set = lex;
}
void lex_close(lex_file *file) {
if (!file) return;
fclose(file->file); /* may already be closed */
mem_d (file);
}
static void lex_addch(int ch, lex_file *file) {
if (file->current < sizeof(file->lastok)-1)
file->lastok[file->current++] = (char)ch;
if (file->current == sizeof(file->lastok)-1)
file->lastok[file->current] = (char)'\0';
}
static GMQCC_INLINE void lex_clear(lex_file *file) {
file->current = 0;
}
/*
* read in inget/unget character from a lexer stream.
* This doesn't play with file streams, the lexer has
* it's own internal state for this.
*/
static int lex_inget(lex_file *file) {
char get;
file->length --;
if (file->last > 0) {
if ((get = file->peek[--file->last]) == '\n')
file->line ++;
return get;
}
if ((get = fgetc(file->file)) == '\n')
file->line++;
return get;
}
static void lex_unget(int ch, lex_file *file) {
if (file->last < sizeof(file->peek)) {
if (ch == '\n')
file->line --;
file->peek[file->last++] = ch;
}
file->length ++;
}
/*
* This is trigraph and digraph support, a feature not qc compiler
* supports. Moving up in this world!
*/
static int lex_trigraph(lex_file *file) {
int ch;
if ((ch = lex_inget(file)) != '?') {
lex_unget(ch, file);
return '?';
}
ch = lex_inget(file);
switch (ch) {
case '(' : return '[' ;
case ')' : return ']' ;
case '/' : return '\\';
case '\'': return '^' ;
case '<' : return '{' ;
case '>' : return '}' ;
case '!' : return '|' ;
case '-' : return '~' ;
case '=' : return '#' ;
default:
lex_unget('?', file);
lex_unget(ch , file);
}
return '?';
}
static int lex_digraph(lex_file *file, int first) {
int ch = lex_inget(file);
switch (first) {
case '<':
if (ch == '%') return '{';
if (ch == ':') return '[';
break;
case '%':
if (ch == '>') return '}';
if (ch == ':') return '#';
break;
case ':':
if (ch == '>') return ']';
break;
}
lex_unget(ch, file);
return first;
}
static int lex_getch(lex_file *file) {
int ch = lex_inget(file);
if (ch == '?')
return lex_trigraph(file);
if (ch == '<' || ch == ':' || ch == '%')
return lex_digraph(file, ch);
return ch;
}
static int lex_get(lex_file *file) {
int ch;
if (!isspace(ch = lex_getch(file)))
return ch;
/* skip over all spaces */
while (isspace(ch) && ch != '\n')
ch = lex_getch(file);
if (ch == '\n')
return ch;
lex_unget(ch, file);
return ' ';
}
static int lex_skipchr(lex_file *file) {
int ch;
int it;
lex_clear(file);
lex_addch('\'', file);
for (it = 0; it < 2 && ((ch = lex_inget(file)) != '\''); it++) {
lex_addch(ch, file);
if (ch == '\n')
return ERROR_LEX;
if (ch == '\\')
lex_addch(lex_getch(file), file);
}
lex_addch('\'', file);
lex_addch('\0', file);
if (it > 2)
return ERROR_LEX;
return LEX_CHRLIT;
}
static int lex_skipstr(lex_file *file) {
int ch;
lex_clear(file);
lex_addch('"', file);
while ((ch = lex_getch(file)) != '"') {
if (ch == '\n' || ch == EOF)
return ERROR_LEX;
lex_addch(ch, file);
if (ch == '\\')
lex_addch(lex_inget(file), file);
}
lex_addch('"', file);
lex_addch('\0', file);
return LEX_STRLIT;
}
static int lex_skipcmt(lex_file *file) {
int ch;
lex_clear(file);
ch = lex_getch(file);
if (ch == '/') {
lex_addch('/', file);
lex_addch('/', file);
while ((ch = lex_getch(file)) != '\n') {
if (ch == '\\') {
lex_addch(ch, file);
lex_addch(lex_getch(file), file);
} else {
lex_addch(ch, file);
}
}
lex_addch('\0', file);
return LEX_COMMENT;
}
if (ch != '*') {
lex_unget(ch, file);
return '/';
}
lex_addch('/', file);
/* hate this */
do {
lex_addch(ch, file);
while ((ch = lex_getch(file)) != '*') {
if (ch == EOF)
return error(file, ERROR_LEX, "malformatted comment");
else
lex_addch(ch, file);
}
lex_addch(ch, file);
} while ((ch = lex_getch(file)) != '/');
lex_addch('/', file);
lex_addch('\0', file);
return LEX_COMMENT;
}
static int lex_getsource(lex_file *file) {
int ch = lex_get(file);
/* skip char/string/comment */
switch (ch) {
case '\'': return lex_skipchr(file);
case '"': return lex_skipstr(file);
case '/': return lex_skipcmt(file);
default:
return ch;
}
}
int lex_token(lex_file *file) {
int ch = lex_getsource(file);
int it;
/* valid identifier */
if (ch > 0 && (ch == '_' || isalpha(ch))) {
lex_clear(file);
while (ch > 0 && (ch == '_' || isalpha(ch))) {
lex_addch(ch, file);
ch = lex_getsource(file);
}
lex_unget(ch, file);
lex_addch('\0', file);
/* look inside the table for a keyword .. */
for (it = 0; it < sizeof(lex_keywords)/sizeof(*lex_keywords); it++)
if (!strncmp(file->lastok, lex_keywords[it], strlen(lex_keywords[it])))
return it;
/* try a type? */
#define TEST_TYPE(X) \
do { \
if (!strncmp(X, "float", sizeof("float"))) \
return TOKEN_FLOAT; \
if (!strncmp(X, "vector", sizeof("vector"))) \
return TOKEN_VECTOR; \
if (!strncmp(X, "string", sizeof("string"))) \
return TOKEN_STRING; \
if (!strncmp(X, "entity", sizeof("entity"))) \
return TOKEN_ENTITY; \
if (!strncmp(X, "void" , sizeof("void"))) \
return TOKEN_VOID; \
} while(0)
TEST_TYPE(file->lastok);
/* try the hashtable for typedefs? */
if (typedef_find(file->lastok))
TEST_TYPE(typedef_find(file->lastok)->name);
#undef TEST_TYPE
return LEX_IDENT;
}
return (ch != ' ') ? ch : lex_token(file);
}
void lex_reset(lex_file *file) {
file->current = 0;
file->last = 0;
file->length = file->size;
fseek(file->file, 0, SEEK_SET);
memset(file->peek, 0, sizeof(file->peek ));
memset(file->lastok, 0, sizeof(file->lastok));
}
void lex_parse(lex_file *file) {
if (!file) return;
parse_gen(file); /* run parser */
}
/*
* Include a file into the lexer / parsing process: This really
* should check if names are the same to prevent endless include
* recrusion.
*/
lex_file *lex_include(lex_file *lex, const char *file) {
lex_file *set = NULL;
util_strrq(file);
if (strncmp(lex->name, file, strlen(lex->name)) == 0) {
error(lex, ERROR_LEX, "Source file cannot include itself\n");
exit (-1);
}
lex_init(file, &set);
return set;
}