/* * Copyright (C) 2012 * Dale Weiler * * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include #include #include "gmqcc.h" /* * These are not lexical tokens: These are parse tree types. Most people * perform tokenizing on language punctuation which is wrong. That stuff * is technically already tokenized, it just needs to be parsed into a tree */ #define PARSE_TYPE_DO 0 #define PARSE_TYPE_ELSE 1 #define PARSE_TYPE_IF 2 #define PARSE_TYPE_WHILE 3 #define PARSE_TYPE_BREAK 4 #define PARSE_TYPE_CONTINUE 5 #define PARSE_TYPE_RETURN 6 #define PARSE_TYPE_GOTO 7 #define PARSE_TYPE_FOR 8 #define PARSE_TYPE_VOID 9 #define PARSE_TYPE_STRING 10 #define PARSE_TYPE_FLOAT 11 #define PARSE_TYPE_VECTOR 12 #define PARSE_TYPE_ENTITY 13 #define PARSE_TYPE_LAND 14 #define PARSE_TYPE_LOR 15 #define PARSE_TYPE_LTEQ 16 #define PARSE_TYPE_GTEQ 17 #define PARSE_TYPE_EQEQ 18 #define PARSE_TYPE_LNEQ 19 #define PARSE_TYPE_COMMA 20 #define PARSE_TYPE_LNOT 21 #define PARSE_TYPE_STAR 22 #define PARSE_TYPE_DIVIDE 23 #define PARSE_TYPE_LPARTH 24 #define PARSE_TYPE_RPARTH 25 #define PARSE_TYPE_MINUS 26 #define PARSE_TYPE_ADD 27 #define PARSE_TYPE_EQUAL 28 #define PARSE_TYPE_LBS 29 #define PARSE_TYPE_RBS 30 #define PARSE_TYPE_ELIP 31 #define PARSE_TYPE_DOT 32 #define PARSE_TYPE_LT 33 #define PARSE_TYPE_GT 34 #define PARSE_TYPE_BAND 35 #define PARSE_TYPE_BOR 36 #define PARSE_TYPE_DONE 37 #define PARSE_TYPE_IDENT 38 /* * Adds a parse type to the parse tree, this is where all the hard * work actually begins. */ #define PARSE_TREE_ADD(X) \ do { \ parsetree->next = mem_a(sizeof(struct parsenode)); \ parsetree->next->next = NULL; \ parsetree->next->type = (X); \ parsetree = parsetree->next; \ } while (0) /* * This is all the punctuation handled in the parser, these don't * need tokens, they're already tokens. */ #if 0 "&&", "||", "<=", ">=", "==", "!=", ";", ",", "!", "*", "/" , "(" , ")" , "-" , "+" , "=" , "[" , "]", "{", "}", "...", "." , "<" , ">" , "&" , "|" , #endif #define STORE(X,C) { \ long f = fill; \ while(f--) { \ putchar(' '); \ } \ fill C; \ printf(X); \ break; \ } void parse_debug(struct parsenode *tree) { long fill = 0; while (tree) { switch (tree->type) { case PARSE_TYPE_ADD: STORE("OPERATOR: ADD \n", -=0); case PARSE_TYPE_BAND: STORE("OPERATOR: BITAND \n",-=0); case PARSE_TYPE_BOR: STORE("OPERATOR: BITOR \n",-=0); case PARSE_TYPE_COMMA: STORE("OPERATOR: SEPERATOR\n",-=0); case PARSE_TYPE_DOT: STORE("OPERATOR: DOT\n",-=0); case PARSE_TYPE_DIVIDE: STORE("OPERATOR: DIVIDE\n",-=0); case PARSE_TYPE_EQUAL: STORE("OPERATOR: ASSIGNMENT\n",-=0); case PARSE_TYPE_BREAK: STORE("STATEMENT: BREAK \n",-=0); case PARSE_TYPE_CONTINUE: STORE("STATEMENT: CONTINUE\n",-=0); case PARSE_TYPE_GOTO: STORE("STATEMENT: GOTO\n",-=0); case PARSE_TYPE_RETURN: STORE("STATEMENT: RETURN\n",-=0); case PARSE_TYPE_DONE: STORE("STATEMENT: DONE\n",-=0); case PARSE_TYPE_VOID: STORE("DECLTYPE: VOID\n",-=0); case PARSE_TYPE_STRING: STORE("DECLTYPE: STRING\n",-=0); case PARSE_TYPE_ELIP: STORE("DECLTYPE: VALIST\n",-=0); case PARSE_TYPE_ENTITY: STORE("DECLTYPE: ENTITY\n",-=0); case PARSE_TYPE_FLOAT: STORE("DECLTYPE: FLOAT\n",-=0); case PARSE_TYPE_VECTOR: STORE("DECLTYPE: VECTOR\n",-=0); case PARSE_TYPE_GT: STORE("TEST: GREATER THAN\n",-=0); case PARSE_TYPE_LT: STORE("TEST: LESS THAN\n",-=0); case PARSE_TYPE_GTEQ: STORE("TEST: GREATER THAN OR EQUAL\n",-=0); case PARSE_TYPE_LTEQ: STORE("TEST: LESS THAN OR EQUAL\n",-=0); case PARSE_TYPE_LNEQ: STORE("TEST: NOT EQUAL\n",-=0); case PARSE_TYPE_EQEQ: STORE("TEST: EQUAL-EQUAL\n",-=0); case PARSE_TYPE_LBS: STORE("BLOCK: BEG\n",+=4); case PARSE_TYPE_RBS: STORE("BLOCK: END\n",-=4); case PARSE_TYPE_ELSE: STORE("BLOCK: ELSE\n",+=0); case PARSE_TYPE_IF: STORE("BLOCK: IF\n",+=0); case PARSE_TYPE_LAND: STORE("LOGICAL: AND\n",-=0); case PARSE_TYPE_LNOT: STORE("LOGICAL: NOT\n",-=0); case PARSE_TYPE_LOR: STORE("LOGICAL: OR\n",-=0); case PARSE_TYPE_LPARTH: STORE("PARTH: BEG\n",-=0); case PARSE_TYPE_RPARTH: STORE("PARTH: END\n",-=0); case PARSE_TYPE_WHILE: STORE("LOOP: WHILE\n",-=0); case PARSE_TYPE_FOR: STORE("LOOP: FOR\n",-=0); case PARSE_TYPE_DO: STORE("LOOP: DO\n",-=0); } tree = tree->next; } } /* * Performs a parse operation: This is a macro to prevent bugs, if the * calls to lex_token are'nt exactly enough to feed to the end of the * actual lexees for the current thing that is being parsed, the state * of the next iteration in the creation of the parse tree will be wrong * and everything will fail. */ #define PARSE_PERFORM(X,C) { \ token = lex_token(file); \ { C } \ while (token != '\n') { \ token = lex_token(file); \ } \ PARSE_TREE_ADD(X); \ break; \ } void parse_clear(struct parsenode *tree) { if (!tree) return; struct parsenode *temp = NULL; while (tree != NULL) { temp = tree; tree = tree->next; mem_d (temp); } /* free any potential typedefs */ typedef_clear(); } const char *STRING_(char ch) { if (ch == ' ') return ""; if (ch == '\n') return ""; if (ch == '\0') return ""; return &ch; } #define TOKEN_SKIPWHITE() \ token = lex_token(file); \ while (token == ' ') { \ token = lex_token(file); \ } /* * Generates a parse tree out of the lexees generated by the lexer. This * is where the tree is built. This is where valid check is performed. */ int parse_tree(struct lex_file *file) { struct parsenode *parsetree = NULL; struct parsenode *parseroot = NULL; /* * Allocate memory for our parse tree: * the parse tree is just a singly linked list which will contain * all the data for code generation. */ if (!parseroot) { parseroot = mem_a(sizeof(struct parsenode)); if (!parseroot) return error(ERROR_INTERNAL, "Ran out of memory", " "); parsetree = parseroot; parsetree->type = -1; /* not a valid type -- root element */ } int token = 0; while ((token = lex_token(file)) != ERROR_LEX && \ token != ERROR_COMPILER && \ token != ERROR_INTERNAL && \ token != ERROR_PARSE && \ token != ERROR_PREPRO && file->length >= 0) { switch (token) { case TOKEN_IF: TOKEN_SKIPWHITE(); if (token != '(') error(ERROR_PARSE, "%s:%d Expected `(` after `if` for if statement\n", file->name, file->line); PARSE_TREE_ADD(PARSE_TYPE_IF); PARSE_TREE_ADD(PARSE_TYPE_LPARTH); break; case TOKEN_ELSE: token = lex_token(file); PARSE_TREE_ADD(PARSE_TYPE_ELSE); break; case TOKEN_FOR: while ((token == ' ' || token == '\n') && file->length >= 0) token = lex_token(file); PARSE_TREE_ADD(PARSE_TYPE_FOR); break; /* * This is a quick and easy way to do typedefs at parse time * all power is in typedef_add(), in typedef.c. We handle * the tokens accordingly here. */ case TOKEN_TYPEDEF: { char *f,*t; token = lex_token(file); token = lex_token(file); f = util_strdup(file->lastok); token = lex_token(file); token = lex_token(file); t = util_strdup(file->lastok); typedef_add(f, t); mem_d(f); mem_d(t); while (token != '\n') token = lex_token(file); break; } /* * Returns are addable as-is, statement checking is during * the actual parse tree check. */ case TOKEN_RETURN: token = lex_token(file); PARSE_TREE_ADD(PARSE_TYPE_RETURN); break; case TOKEN_CONTINUE: PARSE_TREE_ADD(PARSE_TYPE_CONTINUE); break; case TOKEN_DO: PARSE_PERFORM(PARSE_TYPE_DO, {}); case TOKEN_WHILE: PARSE_PERFORM(PARSE_TYPE_WHILE, {}); case TOKEN_BREAK: PARSE_PERFORM(PARSE_TYPE_BREAK, {}); case TOKEN_GOTO: PARSE_PERFORM(PARSE_TYPE_GOTO, {}); case TOKEN_VOID: PARSE_PERFORM(PARSE_TYPE_VOID, {}); case TOKEN_STRING: PARSE_TREE_ADD(PARSE_TYPE_STRING); case TOKEN_VECTOR: PARSE_TREE_ADD(PARSE_TYPE_VECTOR); case TOKEN_ENTITY: PARSE_TREE_ADD(PARSE_TYPE_ENTITY); case TOKEN_FLOAT: PARSE_TREE_ADD(PARSE_TYPE_FLOAT); /* fall into this for all types */ { char *name = NULL; TOKEN_SKIPWHITE(); name = util_strdup(file->lastok); //token = lex_token (file); /* is it NOT a definition? */ if (token != ';') { while (token == ' ') token = lex_token(file); /* it's a function? */ if (token == '(') { /* * Now I essentially have to do a ton of parsing for * function definition. */ PARSE_TREE_ADD(PARSE_TYPE_LPARTH); token = lex_token(file); while (token != '\n' && token != ')') { switch (token) { case TOKEN_VOID: PARSE_TREE_ADD(PARSE_TYPE_VOID); break; case TOKEN_STRING: PARSE_TREE_ADD(PARSE_TYPE_STRING); break; case TOKEN_ENTITY: PARSE_TREE_ADD(PARSE_TYPE_ENTITY); break; case TOKEN_FLOAT: PARSE_TREE_ADD(PARSE_TYPE_FLOAT); break; /* * TODO: Need to parse function pointers: I have no clue how * I'm actually going to pull that off, it's going to be hard * since you can have a function pointer-pointer-pointer .... */ } } /* just a definition */ if (token == ')') { /* * I like to put my { on the same line as the ) for * functions, ifs, elses, so we must support that!. */ PARSE_TREE_ADD(PARSE_TYPE_RPARTH); token = lex_token(file); token = lex_token(file); if(token == '{') PARSE_TREE_ADD(PARSE_TYPE_LBS); } else if (token == '\n') error(ERROR_COMPILER, "%s:%d Expecting `;` after function definition %s\n", file->name, file->line, name); } else if (token == '=') { PARSE_TREE_ADD(PARSE_TYPE_EQUAL); } else { error(ERROR_COMPILER, "%s:%d Invalid decltype: expected `(` [function], or `=` [constant] for %s\n", file->name, file->line, name); } } else { /* definition */ printf("FOUND DEFINITION\n"); } mem_d(name); } /* * From here down is all language punctuation: There is no * need to actual create tokens from these because they're already * tokenized as these individual tokens (which are in a special area * of the ascii table which doesn't conflict with our other tokens * which are higer than the ascii table.) */ case '#': token = lex_token(file); /* skip '#' */ //while (isspace(token)) { // if (token == '\n') // return error(ERROR_PARSE, "Expected valid preprocessor directive after `#` %s\n"); // token = lex_token(file); /* try again */ //} /* * If we make it here we found a directive, the supported * directives so far are #include. */ if (strncmp(file->lastok, "include", sizeof("include")) == 0) { /* * We only suport include " ", not <> like in C (why?) * because the latter is silly. */ while (*file->lastok != '"' && token != '\n') token = lex_token(file); /* we handle lexing at that point now */ if (token == '\n') return error(ERROR_PARSE, "%d: Invalid use of include preprocessor directive: wanted #include \"file.h\"\n", file->line); } /* skip all tokens to end of directive */ while (token != '\n') token = lex_token(file); break; case '.': PARSE_TREE_ADD(PARSE_TYPE_DOT); break; case '(': PARSE_TREE_ADD(PARSE_TYPE_LPARTH); break; case ')': PARSE_TREE_ADD(PARSE_TYPE_RPARTH); break; case '&': /* & */ token = lex_token(file); if (token == '&') { /* && */ token = lex_token(file); PARSE_TREE_ADD(PARSE_TYPE_LAND); break; } PARSE_TREE_ADD(PARSE_TYPE_BAND); break; case '|': /* | */ token = lex_token(file); if (token == '|') { /* || */ token = lex_token(file); PARSE_TREE_ADD(PARSE_TYPE_LOR); break; } PARSE_TREE_ADD(PARSE_TYPE_BOR); break; case '!': /* ! */ token = lex_token(file); if (token == '=') { /* != */ token = lex_token(file); PARSE_TREE_ADD(PARSE_TYPE_LNEQ); break; } PARSE_TREE_ADD(PARSE_TYPE_LNOT); break; case '<': /* < */ token = lex_token(file); if (token == '=') { /* <= */ token = lex_token(file); PARSE_TREE_ADD(PARSE_TYPE_LTEQ); break; } PARSE_TREE_ADD(PARSE_TYPE_LT); break; case '>': /* > */ token = lex_token(file); if (token == '=') { /* >= */ token = lex_token(file); PARSE_TREE_ADD(PARSE_TYPE_GTEQ); break; } PARSE_TREE_ADD(PARSE_TYPE_GT); break; case '=': /* = */ token = lex_token(file); if (token == '=') { /* == */ token = lex_token(file); PARSE_TREE_ADD(PARSE_TYPE_EQEQ); break; } PARSE_TREE_ADD(PARSE_TYPE_EQUAL); break; case ';': token = lex_token(file); PARSE_TREE_ADD(PARSE_TYPE_DONE); break; case '-': token = lex_token(file); PARSE_TREE_ADD(PARSE_TYPE_MINUS); break; case '+': token = lex_token(file); PARSE_TREE_ADD(PARSE_TYPE_ADD); break; case '{': token = lex_token(file); PARSE_TREE_ADD(PARSE_TYPE_LBS); break; case '}': token = lex_token(file); PARSE_TREE_ADD(PARSE_TYPE_RBS); break; /* * TODO: Fix lexer to spit out ( ) as tokens, it seems the * using '(' or ')' in parser doesn't work properly unless * there are spaces before them to allow the lexer to properly * seperate identifiers. -- otherwise it eats all of it. */ case LEX_IDENT: token = lex_token(file); PARSE_TREE_ADD(PARSE_TYPE_IDENT); break; } } parse_debug(parseroot); lex_reset(file); parse_clear(parseroot); return 1; }