I got parsing to work finally.

This commit is contained in:
Dale Weiler 2012-04-12 02:22:28 -04:00
parent 32d7728b2f
commit 9493da4f8b
10 changed files with 192 additions and 249 deletions

1
code.c
View file

@ -146,7 +146,6 @@ VECTOR_MAKE(prog_section_field, code_fields );
VECTOR_MAKE(prog_section_function, code_functions ); VECTOR_MAKE(prog_section_function, code_functions );
VECTOR_MAKE(int, code_globals ); VECTOR_MAKE(int, code_globals );
VECTOR_MAKE(char, code_strings ); VECTOR_MAKE(char, code_strings );
static uint16_t code_crc16 = 0;
prog_header code_header ={0}; prog_header code_header ={0};
void code_init() { void code_init() {

22
lex.c
View file

@ -48,7 +48,7 @@ struct lex_file *lex_open(FILE *fp) {
lex->size = lex->length; /* copy, this is never changed */ lex->size = lex->length; /* copy, this is never changed */
fseek(lex->file, 0, SEEK_SET); fseek(lex->file, 0, SEEK_SET);
lex->last = 0; lex->last = 0;
lex->line = 1; lex->line = 0;
memset(lex->peek, 0, sizeof(lex->peek)); memset(lex->peek, 0, sizeof(lex->peek));
return lex; return lex;
@ -139,15 +139,20 @@ static int lex_digraph(struct lex_file *file, int first) {
static int lex_getch(struct lex_file *file) { static int lex_getch(struct lex_file *file) {
int ch = lex_inget(file); int ch = lex_inget(file);
static int str = 0;
switch (ch) { switch (ch) {
case '?' : case '?' :
return lex_trigraph(file); return lex_trigraph(file);
case '<' : case '<' :
case ':' : case ':' :
case '%' : case '%' :
return lex_digraph (file, ch); case '"' : str = !str; if (str) { file->line ++; }
case '\n': file->line ++; return lex_digraph(file, ch);
case '\n':
if (!str)
file->line++;
} }
return ch; return ch;
@ -277,7 +282,14 @@ int lex_token(struct lex_file *file) {
/* valid identifier */ /* valid identifier */
if (ch > 0 && (ch == '_' || isalpha(ch))) { if (ch > 0 && (ch == '_' || isalpha(ch))) {
lex_clear(file); lex_clear(file);
while (ch > 0 && ch != ' ' && ch != '(' && ch != '\n' && ch != ';') {
/*
* Yes this is dirty, but there is no other _sane_ easy
* way to do it, this is what I call defensive programming
* if something breaks, add more defense :-)
*/
while (ch > 0 && ch != ' ' && ch != '(' &&
ch != '\n' && ch != ';' && ch != ')') {
lex_addch(ch, file); lex_addch(ch, file);
ch = lex_getsource(file); ch = lex_getsource(file);
} }

345
parse.c
View file

@ -230,30 +230,9 @@ int parse_tree(struct lex_file *file) {
token != ERROR_PARSE && \ token != ERROR_PARSE && \
token != ERROR_PREPRO && file->length >= 0) { token != ERROR_PREPRO && file->length >= 0) {
switch (token) { switch (token) {
case TOKEN_IF:
TOKEN_SKIPWHITE();
if (token != '(')
error(ERROR_PARSE, "%s:%d Expected `(` after `if` for if statement\n", file->name, file->line);
PARSE_TREE_ADD(PARSE_TYPE_IF);
PARSE_TREE_ADD(PARSE_TYPE_LPARTH);
break;
case TOKEN_ELSE:
token = lex_token(file);
PARSE_TREE_ADD(PARSE_TYPE_ELSE);
break;
case TOKEN_FOR:
while ((token == ' ' || token == '\n') && file->length >= 0)
token = lex_token(file);
PARSE_TREE_ADD(PARSE_TYPE_FOR);
break;
/*
* This is a quick and easy way to do typedefs at parse time
* all power is in typedef_add(), in typedef.c. We handle
* the tokens accordingly here.
*/
case TOKEN_TYPEDEF: { case TOKEN_TYPEDEF: {
char *f,*t; char *f; /* from */
char *t; /* to */
token = lex_token(file); token = lex_token(file);
token = lex_token(file); f = util_strdup(file->lastok); token = lex_token(file); f = util_strdup(file->lastok);
@ -261,101 +240,192 @@ int parse_tree(struct lex_file *file) {
token = lex_token(file); t = util_strdup(file->lastok); token = lex_token(file); t = util_strdup(file->lastok);
typedef_add(f, t); typedef_add(f, t);
printf("TYPEDEF %s as %s\n", f, t);
mem_d(f); mem_d(f);
mem_d(t); mem_d(t);
//while (token != '\n')
token = lex_token(file); token = lex_token(file);
if (token == ' ')
token = lex_token(file);
if (token != ';') if (token != ';')
error(ERROR_PARSE, "%s:%d Expected `;` on typedef\n", file->name, file->line); error(ERROR_PARSE, "%s:%d Expected `;` on typedef\n", file->name, file->line);
token = lex_token(file); token = lex_token(file);
printf("TOK: %c\n", token);
break; break;
} }
/* case TOKEN_VOID: PARSE_TREE_ADD(PARSE_TYPE_VOID); goto fall;
* Returns are addable as-is, statement checking is during
* the actual parse tree check.
*/
case TOKEN_RETURN:
token = lex_token(file);
PARSE_TREE_ADD(PARSE_TYPE_RETURN);
break;
case TOKEN_CONTINUE:
PARSE_TREE_ADD(PARSE_TYPE_CONTINUE);
break;
case TOKEN_DO: PARSE_PERFORM(PARSE_TYPE_DO, {});
case TOKEN_WHILE: PARSE_PERFORM(PARSE_TYPE_WHILE, {});
case TOKEN_BREAK: PARSE_PERFORM(PARSE_TYPE_BREAK, {});
case TOKEN_GOTO: PARSE_PERFORM(PARSE_TYPE_GOTO, {});
case TOKEN_VOID: PARSE_PERFORM(PARSE_TYPE_VOID, {});
case TOKEN_STRING: PARSE_TREE_ADD(PARSE_TYPE_STRING); goto fall; case TOKEN_STRING: PARSE_TREE_ADD(PARSE_TYPE_STRING); goto fall;
case TOKEN_VECTOR: PARSE_TREE_ADD(PARSE_TYPE_VECTOR); goto fall; case TOKEN_VECTOR: PARSE_TREE_ADD(PARSE_TYPE_VECTOR); goto fall;
case TOKEN_ENTITY: PARSE_TREE_ADD(PARSE_TYPE_ENTITY); goto fall; case TOKEN_ENTITY: PARSE_TREE_ADD(PARSE_TYPE_ENTITY); goto fall;
case TOKEN_FLOAT: PARSE_TREE_ADD(PARSE_TYPE_FLOAT); goto fall; case TOKEN_FLOAT: PARSE_TREE_ADD(PARSE_TYPE_FLOAT); goto fall;
/* fall into this for all types */
{ {
fall:; fall:;
char *name = NULL; char *name = NULL;
TOKEN_SKIPWHITE(); int type = token; /* story copy */
name = util_strdup(file->lastok);
token = lex_token (file);
/* is it NOT a definition? */ /* skip over space */
if (token != ';') { token = lex_token(file);
while (token == ' ') if (token == ' ')
token = lex_token(file);
/* save name */
name = util_strdup(file->lastok);
/* skip spaces */
token = lex_token(file);
if (token == ' ')
token = lex_token(file);
if (token == ';') {
printf("definition\n");
} else if (token == '=') {
token = lex_token(file);
if (token == ' ')
token = lex_token(file); token = lex_token(file);
/* it's a function? */ /* strings are in file->lastok */
if (token == '(') { switch (type) {
/* case TOKEN_VOID: return error(ERROR_PARSE, "%s:%d Cannot assign value to type void\n", file->name, file->line);
* Now I essentially have to do a ton of parsing for case TOKEN_STRING:
* function definition. if (*file->lastok != '"')
*/ error(ERROR_PARSE, "%s:%d Expected a '\"' for string constant\n", file->name, file->line);
PARSE_TREE_ADD(PARSE_TYPE_LPARTH); break;
token = lex_token(file); case TOKEN_VECTOR: {
while (token != '\n' && token != ')') { float compile_calc_x = 0;
switch (token) { float compile_calc_y = 0;
case TOKEN_VOID: PARSE_TREE_ADD(PARSE_TYPE_VOID); break; float compile_calc_z = 0;
case TOKEN_STRING: PARSE_TREE_ADD(PARSE_TYPE_STRING); break; int compile_calc_d = 0; /* dot? */
case TOKEN_ENTITY: PARSE_TREE_ADD(PARSE_TYPE_ENTITY); break;
case TOKEN_FLOAT: PARSE_TREE_ADD(PARSE_TYPE_FLOAT); break; char compile_data[1024];
/* char *compile_eval = compile_data;
* TODO: Need to parse function pointers: I have no clue how
* I'm actually going to pull that off, it's going to be hard if (token != '{')
* since you can have a function pointer-pointer-pointer .... error(ERROR_PARSE, "%s:%d Expected initializer list `{`,`}` for vector constant\n", file->name, file->line);
*/
} token = lex_token(file);
} if (token == ' ')
/* just a definition */ token = lex_token(file);
if (token == ')') {
/* /*
* I like to put my { on the same line as the ) for * we support .7623, unlike anyother QuakeC
* functions, ifs, elses, so we must support that!. * compiler. Does that make us better :-).
*/ */
PARSE_TREE_ADD(PARSE_TYPE_RPARTH); if (token == '.')
compile_calc_d = 1;
if (!isdigit(token) && !compile_calc_d)
error(ERROR_PARSE, "%s:%d Invalid constant initializer element for vector, must be numeric\n", file->name, file->line);
/*
* Read in constant data, will be in float format
* which means we use atof.
*/
while (isdigit(token) || token == '.') {
*compile_eval++ = token;
token = lex_token(file);
if (token == '.' && compile_calc_d) {
error(ERROR_PARSE, "%s:%d Invalid constant initializer element for vector, must be numeric.\n", file->name, file->line);
} else if (token == '.' && !compile_calc_d) {
compile_calc_d = 1;
}
}
if (token == ' ')
token = lex_token(file);
if (token != ',' && token != ' ')
error(ERROR_PARSE, "%s:%d invalid constant initializer element for vector (missing spaces, or comma delimited list?)\n", file->name, file->line);
compile_calc_x = atof(compile_data);
compile_calc_d = 0;
memset(compile_data, 0, sizeof(compile_data));
compile_eval = &compile_data[0];
token = lex_token(file); token = lex_token(file);
if (token == ' ')
token = lex_token(file);
if (token == '.')
compile_calc_d = 1;
if (!isdigit(token) && !compile_calc_d)
error(ERROR_PARSE, "%s:%d Invalid constant initializer element for vector, must be numeric\n", file->name, file->line);
/*
* Read in constant data, will be in float format
* which means we use atof.
*/
while (isdigit(token) || token == '.') {
*compile_eval++ = token;
token = lex_token(file);
if (token == '.' && compile_calc_d) {
error(ERROR_PARSE, "%s:%d Invalid constant initializer element for vector, must be numeric.\n", file->name, file->line);
} else if (token == '.' && !compile_calc_d) {
compile_calc_d = 1;
}
}
if (token == ' ')
token = lex_token(file);
if (token != ',' && token != ' ')
error(ERROR_PARSE, "%s:%d invalid constant initializer element for vector (missing spaces, or comma delimited list?)\n", file->name, file->line);
compile_calc_y = atof(compile_data);
compile_calc_d = 0;
memset(compile_data, 0, sizeof(compile_data));
compile_eval = &compile_data[0];
token = lex_token(file); token = lex_token(file);
if(token == '{') if (token == ' ')
PARSE_TREE_ADD(PARSE_TYPE_LBS); token = lex_token(file);
if (token == '.')
compile_calc_d = 1;
if (!isdigit(token) && !compile_calc_d)
error(ERROR_PARSE, "%s:%d Invalid constant initializer element for vector, must be numeric\n", file->name, file->line);
/*
* Read in constant data, will be in float format
* which means we use atof.
*/
while (isdigit(token) || token == '.') {
*compile_eval++ = token;
token = lex_token(file);
if (token == '.' && compile_calc_d) {
error(ERROR_PARSE, "%s:%d Invalid constant initializer element for vector, must be numeric.\n", file->name, file->line);
} else if (token == '.' && !compile_calc_d) {
compile_calc_d = 1;
}
}
if (token == ' ')
token = lex_token(file);
if (token != '}')
error(ERROR_PARSE, "%s:%d Expected `}` on end of constant initialization for vector\n", file->name, file->line);
compile_calc_z = atof(compile_data);
/*
* Check for the semi-colon... This is insane
* the amount of parsing here that is.
*/
token = lex_token(file);
if (token == ' ')
token = lex_token(file);
if (token != ';')
error(ERROR_PARSE, "%s:%d Expected `;` on end of constant initialization for vector\n", file->name, file->line);
//printf("VEC_X: %f\n", compile_calc_x);
//printf("VEC_Y: %f\n", compile_calc_y);
//printf("VEC_X: %f\n", compile_calc_z);
break;
} }
else if (token == '\n')
error(ERROR_COMPILER, "%s:%d Expecting `;` after function definition %s\n", file->name, file->line, name); case TOKEN_ENTITY:
case TOKEN_FLOAT:
} else if (token == '=') { if (!isdigit(token))
PARSE_TREE_ADD(PARSE_TYPE_EQUAL); error(ERROR_PARSE, "%s:%d Expected numeric constant for float constant\n");
} else { break;
error(ERROR_COMPILER, "%s:%d Invalid decltype: expected `(` [function], or `=` [constant], or `;` [definition] for %s\n", file->name, file->line, name); }
} } else if (token == '(') {
} else { printf("FUNCTION ??\n");
/* definition */
printf("FOUND DEFINITION\n");
} }
mem_d(name); mem_d(name);
} }
@ -391,97 +461,6 @@ int parse_tree(struct lex_file *file) {
token = lex_token(file); token = lex_token(file);
break; break;
case '.':
PARSE_TREE_ADD(PARSE_TYPE_DOT);
break;
case '(':
PARSE_TREE_ADD(PARSE_TYPE_LPARTH);
break;
case ')':
PARSE_TREE_ADD(PARSE_TYPE_RPARTH);
break;
case '&': /* & */
token = lex_token(file);
if (token == '&') { /* && */
token = lex_token(file);
PARSE_TREE_ADD(PARSE_TYPE_LAND);
break;
}
PARSE_TREE_ADD(PARSE_TYPE_BAND);
break;
case '|': /* | */
token = lex_token(file);
if (token == '|') { /* || */
token = lex_token(file);
PARSE_TREE_ADD(PARSE_TYPE_LOR);
break;
}
PARSE_TREE_ADD(PARSE_TYPE_BOR);
break;
case '!': /* ! */
token = lex_token(file);
if (token == '=') { /* != */
token = lex_token(file);
PARSE_TREE_ADD(PARSE_TYPE_LNEQ);
break;
}
PARSE_TREE_ADD(PARSE_TYPE_LNOT);
break;
case '<': /* < */
token = lex_token(file);
if (token == '=') { /* <= */
token = lex_token(file);
PARSE_TREE_ADD(PARSE_TYPE_LTEQ);
break;
}
PARSE_TREE_ADD(PARSE_TYPE_LT);
break;
case '>': /* > */
token = lex_token(file);
if (token == '=') { /* >= */
token = lex_token(file);
PARSE_TREE_ADD(PARSE_TYPE_GTEQ);
break;
}
PARSE_TREE_ADD(PARSE_TYPE_GT);
break;
case '=': /* = */
token = lex_token(file);
if (token == '=') { /* == */
token = lex_token(file);
PARSE_TREE_ADD(PARSE_TYPE_EQEQ);
break;
}
PARSE_TREE_ADD(PARSE_TYPE_EQUAL);
break;
case ';':
token = lex_token(file);
PARSE_TREE_ADD(PARSE_TYPE_DONE);
break;
case '-':
token = lex_token(file);
PARSE_TREE_ADD(PARSE_TYPE_MINUS);
break;
case '+':
token = lex_token(file);
PARSE_TREE_ADD(PARSE_TYPE_ADD);
break;
case '{':
token = lex_token(file);
PARSE_TREE_ADD(PARSE_TYPE_LBS);
break;
case '}':
token = lex_token(file);
PARSE_TREE_ADD(PARSE_TYPE_RBS);
break;
/*
* TODO: Fix lexer to spit out ( ) as tokens, it seems the
* using '(' or ')' in parser doesn't work properly unless
* there are spaces before them to allow the lexer to properly
* seperate identifiers. -- otherwise it eats all of it.
*/
case LEX_IDENT: case LEX_IDENT:
token = lex_token(file); token = lex_token(file);
PARSE_TREE_ADD(PARSE_TYPE_IDENT); PARSE_TREE_ADD(PARSE_TYPE_IDENT);

View file

@ -1,5 +0,0 @@
<%
<%
<% %>
%>
%>

View file

@ -1,9 +0,0 @@
float test_1data = 1;
float test_2data = 2;
float test_if()
{
if (test_1data == test_2data) {
/* do this code */
}
}

View file

@ -1,16 +1,10 @@
( void test_parth() {
( if (1) { }
( if (2) { }
) if (3) { }
) if (4) { }
) if (5) { }
( if (6) { }
( if (7) { }
) if (8) { }
) }
(
)
(
)
(
)

View file

@ -1,22 +0,0 @@
if(1) {
if(1) {
return 0;
} else {
return 1;
}
} else {
for {
if(1) {
return 2;
} else {
continue;
}
}
do {
if(1){
break;
} else {
goto finish;
}
} while ( )
}

View file

@ -1,5 +0,0 @@
??<
??<
??< ??>
??>
??>

View file

@ -4,7 +4,7 @@ typedef string my_string;
typedef entity my_entity; typedef entity my_entity;
typedef void my_void; typedef void my_void;
my_float type_float my_float type_float;
my_vector type_vector; my_vector type_vector;
my_string type_string; my_string type_string;
my_entity type_entity; my_entity type_entity;

View file

@ -1,5 +1,5 @@
float typef; float typef = 1;
vector typev; vector typev = {0,1,2};
string types; string types;
entity typee; entity typee;
void typev; void typev;