gmqcc/parse.c
2012-04-11 19:41:04 -04:00

492 lines
15 KiB
C

/*
* Copyright (C) 2012
* Dale Weiler
*
* Permission is hereby granted, free of charge, to any person obtaining a copy of
* this software and associated documentation files (the "Software"), to deal in
* the Software without restriction, including without limitation the rights to
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is furnished to do
* so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <limits.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "gmqcc.h"
/*
* These are not lexical tokens: These are parse tree types. Most people
* perform tokenizing on language punctuation which is wrong. That stuff
* is technically already tokenized, it just needs to be parsed into a tree
*/
#define PARSE_TYPE_DO 0
#define PARSE_TYPE_ELSE 1
#define PARSE_TYPE_IF 2
#define PARSE_TYPE_WHILE 3
#define PARSE_TYPE_BREAK 4
#define PARSE_TYPE_CONTINUE 5
#define PARSE_TYPE_RETURN 6
#define PARSE_TYPE_GOTO 7
#define PARSE_TYPE_FOR 8
#define PARSE_TYPE_VOID 9
#define PARSE_TYPE_STRING 10
#define PARSE_TYPE_FLOAT 11
#define PARSE_TYPE_VECTOR 12
#define PARSE_TYPE_ENTITY 13
#define PARSE_TYPE_LAND 14
#define PARSE_TYPE_LOR 15
#define PARSE_TYPE_LTEQ 16
#define PARSE_TYPE_GTEQ 17
#define PARSE_TYPE_EQEQ 18
#define PARSE_TYPE_LNEQ 19
#define PARSE_TYPE_COMMA 20
#define PARSE_TYPE_LNOT 21
#define PARSE_TYPE_STAR 22
#define PARSE_TYPE_DIVIDE 23
#define PARSE_TYPE_LPARTH 24
#define PARSE_TYPE_RPARTH 25
#define PARSE_TYPE_MINUS 26
#define PARSE_TYPE_ADD 27
#define PARSE_TYPE_EQUAL 28
#define PARSE_TYPE_LBS 29
#define PARSE_TYPE_RBS 30
#define PARSE_TYPE_ELIP 31
#define PARSE_TYPE_DOT 32
#define PARSE_TYPE_LT 33
#define PARSE_TYPE_GT 34
#define PARSE_TYPE_BAND 35
#define PARSE_TYPE_BOR 36
#define PARSE_TYPE_DONE 37
#define PARSE_TYPE_IDENT 38
/*
* Adds a parse type to the parse tree, this is where all the hard
* work actually begins.
*/
#define PARSE_TREE_ADD(X) \
do { \
parsetree->next = mem_a(sizeof(struct parsenode)); \
parsetree->next->next = NULL; \
parsetree->next->type = (X); \
parsetree = parsetree->next; \
} while (0)
/*
* This is all the punctuation handled in the parser, these don't
* need tokens, they're already tokens.
*/
#if 0
"&&", "||", "<=", ">=", "==", "!=", ";", ",", "!", "*",
"/" , "(" , ")" , "-" , "+" , "=" , "[" , "]", "{", "}", "...",
"." , "<" , ">" , "&" , "|" ,
#endif
#define STORE(X,C) { \
long f = fill; \
while(f--) { \
putchar(' '); \
} \
fill C; \
printf(X); \
break; \
}
void parse_debug(struct parsenode *tree) {
long fill = 0;
while (tree) {
switch (tree->type) {
case PARSE_TYPE_ADD: STORE("OPERATOR: ADD \n", -=0);
case PARSE_TYPE_BAND: STORE("OPERATOR: BITAND \n",-=0);
case PARSE_TYPE_BOR: STORE("OPERATOR: BITOR \n",-=0);
case PARSE_TYPE_COMMA: STORE("OPERATOR: SEPERATOR\n",-=0);
case PARSE_TYPE_DOT: STORE("OPERATOR: DOT\n",-=0);
case PARSE_TYPE_DIVIDE: STORE("OPERATOR: DIVIDE\n",-=0);
case PARSE_TYPE_EQUAL: STORE("OPERATOR: ASSIGNMENT\n",-=0);
case PARSE_TYPE_BREAK: STORE("STATEMENT: BREAK \n",-=0);
case PARSE_TYPE_CONTINUE: STORE("STATEMENT: CONTINUE\n",-=0);
case PARSE_TYPE_GOTO: STORE("STATEMENT: GOTO\n",-=0);
case PARSE_TYPE_RETURN: STORE("STATEMENT: RETURN\n",-=0);
case PARSE_TYPE_DONE: STORE("STATEMENT: DONE\n",-=0);
case PARSE_TYPE_VOID: STORE("DECLTYPE: VOID\n",-=0);
case PARSE_TYPE_STRING: STORE("DECLTYPE: STRING\n",-=0);
case PARSE_TYPE_ELIP: STORE("DECLTYPE: VALIST\n",-=0);
case PARSE_TYPE_ENTITY: STORE("DECLTYPE: ENTITY\n",-=0);
case PARSE_TYPE_FLOAT: STORE("DECLTYPE: FLOAT\n",-=0);
case PARSE_TYPE_VECTOR: STORE("DECLTYPE: VECTOR\n",-=0);
case PARSE_TYPE_GT: STORE("TEST: GREATER THAN\n",-=0);
case PARSE_TYPE_LT: STORE("TEST: LESS THAN\n",-=0);
case PARSE_TYPE_GTEQ: STORE("TEST: GREATER THAN OR EQUAL\n",-=0);
case PARSE_TYPE_LTEQ: STORE("TEST: LESS THAN OR EQUAL\n",-=0);
case PARSE_TYPE_LNEQ: STORE("TEST: NOT EQUAL\n",-=0);
case PARSE_TYPE_EQEQ: STORE("TEST: EQUAL-EQUAL\n",-=0);
case PARSE_TYPE_LBS: STORE("BLOCK: BEG\n",+=4);
case PARSE_TYPE_RBS: STORE("BLOCK: END\n",-=4);
case PARSE_TYPE_ELSE: STORE("BLOCK: ELSE\n",+=0);
case PARSE_TYPE_IF: STORE("BLOCK: IF\n",+=0);
case PARSE_TYPE_LAND: STORE("LOGICAL: AND\n",-=0);
case PARSE_TYPE_LNOT: STORE("LOGICAL: NOT\n",-=0);
case PARSE_TYPE_LOR: STORE("LOGICAL: OR\n",-=0);
case PARSE_TYPE_LPARTH: STORE("PARTH: BEG\n",-=0);
case PARSE_TYPE_RPARTH: STORE("PARTH: END\n",-=0);
case PARSE_TYPE_WHILE: STORE("LOOP: WHILE\n",-=0);
case PARSE_TYPE_FOR: STORE("LOOP: FOR\n",-=0);
case PARSE_TYPE_DO: STORE("LOOP: DO\n",-=0);
}
tree = tree->next;
}
}
/*
* Performs a parse operation: This is a macro to prevent bugs, if the
* calls to lex_token are'nt exactly enough to feed to the end of the
* actual lexees for the current thing that is being parsed, the state
* of the next iteration in the creation of the parse tree will be wrong
* and everything will fail.
*/
#define PARSE_PERFORM(X,C) { \
token = lex_token(file); \
{ C } \
while (token != '\n') { \
token = lex_token(file); \
} \
PARSE_TREE_ADD(X); \
break; \
}
void parse_clear(struct parsenode *tree) {
if (!tree) return;
struct parsenode *temp = NULL;
while (tree != NULL) {
temp = tree;
tree = tree->next;
mem_d (temp);
}
/* free any potential typedefs */
typedef_clear();
}
const char *STRING_(char ch) {
if (ch == ' ')
return "<space>";
if (ch == '\n')
return "<newline>";
if (ch == '\0')
return "<null>";
return &ch;
}
#define TOKEN_SKIPWHITE() \
token = lex_token(file); \
while (token == ' ') { \
token = lex_token(file); \
}
/*
* Generates a parse tree out of the lexees generated by the lexer. This
* is where the tree is built. This is where valid check is performed.
*/
int parse_tree(struct lex_file *file) {
struct parsenode *parsetree = NULL;
struct parsenode *parseroot = NULL;
/*
* Allocate memory for our parse tree:
* the parse tree is just a singly linked list which will contain
* all the data for code generation.
*/
if (!parseroot) {
parseroot = mem_a(sizeof(struct parsenode));
if (!parseroot)
return error(ERROR_INTERNAL, "Ran out of memory", " ");
parsetree = parseroot;
parsetree->type = -1; /* not a valid type -- root element */
}
int token = 0;
while ((token = lex_token(file)) != ERROR_LEX && \
token != ERROR_COMPILER && \
token != ERROR_INTERNAL && \
token != ERROR_PARSE && \
token != ERROR_PREPRO && file->length >= 0) {
switch (token) {
case TOKEN_IF:
TOKEN_SKIPWHITE();
if (token != '(')
error(ERROR_PARSE, "%s:%d Expected `(` after `if` for if statement\n", file->name, file->line);
PARSE_TREE_ADD(PARSE_TYPE_IF);
PARSE_TREE_ADD(PARSE_TYPE_LPARTH);
break;
case TOKEN_ELSE:
token = lex_token(file);
PARSE_TREE_ADD(PARSE_TYPE_ELSE);
break;
case TOKEN_FOR:
while ((token == ' ' || token == '\n') && file->length >= 0)
token = lex_token(file);
PARSE_TREE_ADD(PARSE_TYPE_FOR);
break;
/*
* This is a quick and easy way to do typedefs at parse time
* all power is in typedef_add(), in typedef.c. We handle
* the tokens accordingly here.
*/
case TOKEN_TYPEDEF: {
char *f,*t;
token = lex_token(file);
token = lex_token(file); f = util_strdup(file->lastok);
token = lex_token(file);
token = lex_token(file); t = util_strdup(file->lastok);
typedef_add(f, t);
mem_d(f);
mem_d(t);
while (token != '\n')
token = lex_token(file);
break;
}
/*
* Returns are addable as-is, statement checking is during
* the actual parse tree check.
*/
case TOKEN_RETURN:
token = lex_token(file);
PARSE_TREE_ADD(PARSE_TYPE_RETURN);
break;
case TOKEN_CONTINUE:
PARSE_TREE_ADD(PARSE_TYPE_CONTINUE);
break;
case TOKEN_DO: PARSE_PERFORM(PARSE_TYPE_DO, {});
case TOKEN_WHILE: PARSE_PERFORM(PARSE_TYPE_WHILE, {});
case TOKEN_BREAK: PARSE_PERFORM(PARSE_TYPE_BREAK, {});
case TOKEN_GOTO: PARSE_PERFORM(PARSE_TYPE_GOTO, {});
case TOKEN_VOID: PARSE_PERFORM(PARSE_TYPE_VOID, {});
case TOKEN_STRING: PARSE_TREE_ADD(PARSE_TYPE_STRING);
case TOKEN_VECTOR: PARSE_TREE_ADD(PARSE_TYPE_VECTOR);
case TOKEN_ENTITY: PARSE_TREE_ADD(PARSE_TYPE_ENTITY);
case TOKEN_FLOAT: PARSE_TREE_ADD(PARSE_TYPE_FLOAT);
/* fall into this for all types */
{
char *name = NULL;
TOKEN_SKIPWHITE();
name = util_strdup(file->lastok);
//token = lex_token (file);
/* is it NOT a definition? */
if (token != ';') {
while (token == ' ')
token = lex_token(file);
/* it's a function? */
if (token == '(') {
/*
* Now I essentially have to do a ton of parsing for
* function definition.
*/
PARSE_TREE_ADD(PARSE_TYPE_LPARTH);
token = lex_token(file);
while (token != '\n' && token != ')') {
switch (token) {
case TOKEN_VOID: PARSE_TREE_ADD(PARSE_TYPE_VOID); break;
case TOKEN_STRING: PARSE_TREE_ADD(PARSE_TYPE_STRING); break;
case TOKEN_ENTITY: PARSE_TREE_ADD(PARSE_TYPE_ENTITY); break;
case TOKEN_FLOAT: PARSE_TREE_ADD(PARSE_TYPE_FLOAT); break;
/*
* TODO: Need to parse function pointers: I have no clue how
* I'm actually going to pull that off, it's going to be hard
* since you can have a function pointer-pointer-pointer ....
*/
}
}
/* just a definition */
if (token == ')') {
/*
* I like to put my { on the same line as the ) for
* functions, ifs, elses, so we must support that!.
*/
PARSE_TREE_ADD(PARSE_TYPE_RPARTH);
token = lex_token(file);
token = lex_token(file);
if(token == '{')
PARSE_TREE_ADD(PARSE_TYPE_LBS);
}
else if (token == '\n')
error(ERROR_COMPILER, "%s:%d Expecting `;` after function definition %s\n", file->name, file->line, name);
} else if (token == '=') {
PARSE_TREE_ADD(PARSE_TYPE_EQUAL);
} else {
error(ERROR_COMPILER, "%s:%d Invalid decltype: expected `(` [function], or `=` [constant] for %s\n", file->name, file->line, name);
}
} else {
/* definition */
printf("FOUND DEFINITION\n");
}
mem_d(name);
}
/*
* From here down is all language punctuation: There is no
* need to actual create tokens from these because they're already
* tokenized as these individual tokens (which are in a special area
* of the ascii table which doesn't conflict with our other tokens
* which are higer than the ascii table.)
*/
case '#':
token = lex_token(file); /* skip '#' */
//while (isspace(token)) {
// if (token == '\n')
// return error(ERROR_PARSE, "Expected valid preprocessor directive after `#` %s\n");
// token = lex_token(file); /* try again */
//}
/*
* If we make it here we found a directive, the supported
* directives so far are #include.
*/
if (strncmp(file->lastok, "include", sizeof("include")) == 0) {
/*
* We only suport include " ", not <> like in C (why?)
* because the latter is silly.
*/
while (*file->lastok != '"' && token != '\n')
token = lex_token(file);
/* we handle lexing at that point now */
if (token == '\n')
return error(ERROR_PARSE, "%d: Invalid use of include preprocessor directive: wanted #include \"file.h\"\n", file->line);
}
/* skip all tokens to end of directive */
while (token != '\n')
token = lex_token(file);
break;
case '.':
PARSE_TREE_ADD(PARSE_TYPE_DOT);
break;
case '(':
PARSE_TREE_ADD(PARSE_TYPE_LPARTH);
break;
case ')':
PARSE_TREE_ADD(PARSE_TYPE_RPARTH);
break;
case '&': /* & */
token = lex_token(file);
if (token == '&') { /* && */
token = lex_token(file);
PARSE_TREE_ADD(PARSE_TYPE_LAND);
break;
}
PARSE_TREE_ADD(PARSE_TYPE_BAND);
break;
case '|': /* | */
token = lex_token(file);
if (token == '|') { /* || */
token = lex_token(file);
PARSE_TREE_ADD(PARSE_TYPE_LOR);
break;
}
PARSE_TREE_ADD(PARSE_TYPE_BOR);
break;
case '!': /* ! */
token = lex_token(file);
if (token == '=') { /* != */
token = lex_token(file);
PARSE_TREE_ADD(PARSE_TYPE_LNEQ);
break;
}
PARSE_TREE_ADD(PARSE_TYPE_LNOT);
break;
case '<': /* < */
token = lex_token(file);
if (token == '=') { /* <= */
token = lex_token(file);
PARSE_TREE_ADD(PARSE_TYPE_LTEQ);
break;
}
PARSE_TREE_ADD(PARSE_TYPE_LT);
break;
case '>': /* > */
token = lex_token(file);
if (token == '=') { /* >= */
token = lex_token(file);
PARSE_TREE_ADD(PARSE_TYPE_GTEQ);
break;
}
PARSE_TREE_ADD(PARSE_TYPE_GT);
break;
case '=': /* = */
token = lex_token(file);
if (token == '=') { /* == */
token = lex_token(file);
PARSE_TREE_ADD(PARSE_TYPE_EQEQ);
break;
}
PARSE_TREE_ADD(PARSE_TYPE_EQUAL);
break;
case ';':
token = lex_token(file);
PARSE_TREE_ADD(PARSE_TYPE_DONE);
break;
case '-':
token = lex_token(file);
PARSE_TREE_ADD(PARSE_TYPE_MINUS);
break;
case '+':
token = lex_token(file);
PARSE_TREE_ADD(PARSE_TYPE_ADD);
break;
case '{':
token = lex_token(file);
PARSE_TREE_ADD(PARSE_TYPE_LBS);
break;
case '}':
token = lex_token(file);
PARSE_TREE_ADD(PARSE_TYPE_RBS);
break;
/*
* TODO: Fix lexer to spit out ( ) as tokens, it seems the
* using '(' or ')' in parser doesn't work properly unless
* there are spaces before them to allow the lexer to properly
* seperate identifiers. -- otherwise it eats all of it.
*/
case LEX_IDENT:
token = lex_token(file);
PARSE_TREE_ADD(PARSE_TYPE_IDENT);
break;
}
}
parse_debug(parseroot);
lex_reset(file);
parse_clear(parseroot);
return 1;
}