This commit is contained in:
Dale Weiler 2012-04-10 04:20:15 -04:00
parent 4c23a669aa
commit bd5ba9e0fe
5 changed files with 59 additions and 164 deletions

12
gmqcc.h
View file

@ -156,7 +156,8 @@ struct lex_file {
#define TOKEN_FOR 8 // extension
#define TOKEN_TYPEDEF 9 // extension
// ensure the token types are out of the
// bounds of anyothers that may conflict.
#define TOKEN_FLOAT 110
#define TOKEN_VECTOR 111
#define TOKEN_STRING 112
@ -166,16 +167,17 @@ struct lex_file {
/*
* Lexer state constants, these are numbers for where exactly in
* the lexing the lexer is at. Or where it decided to stop if a lexer
* error occurs.
* error occurs. These numbers must be > where the ascii-table ends
* and > the last type token which is TOKEN_VOID
*/
#define LEX_COMMENT 1128 /* higher than ascii */
#define LEX_COMMENT 1128
#define LEX_CHRLIT 1129
#define LEX_STRLIT 1130
#define LEX_IDENT 1131
int lex_token(struct lex_file *);
void lex_reset(struct lex_file *);
int lex_close(struct lex_file *);
void lex_close(struct lex_file *);
struct lex_file *lex_open (FILE *);
/* errors */
@ -187,7 +189,7 @@ struct lex_file *lex_open (FILE *);
int error(int, const char *, ...);
/* parse.c */
int parse(struct lex_file *);
int parse_tree(struct lex_file *);
struct parsenode {
struct parsenode *next;
int type; /* some token */

12
lex.c
View file

@ -52,13 +52,11 @@ struct lex_file *lex_open(FILE *fp) {
return lex;
}
int lex_close(struct lex_file *file) {
int ret = -1;
if (file) {
ret = fclose(file->file);
mem_d(file);
}
return ret;
void lex_close(struct lex_file *file) {
if (!file) return;
fclose(file->file); /* may already be closed */
mem_d(file);
}
static void lex_addch(int ch, struct lex_file *file) {

4
main.c
View file

@ -65,8 +65,8 @@ int main(int argc, char **argv) {
return error(ERROR_COMPILER, "Source file: %s not found\n", ifile);
} else {
struct lex_file *lex = lex_open(fp);
parse (lex);
lex_close(lex);
parse_tree(lex); /* generate parse tree */
lex_close (lex); /* cleanup lexer */
}
return 0;
}

68
parse.c
View file

@ -151,13 +151,15 @@ void parse_debug(struct parsenode *tree) {
}
/*
* This just skips the token and throws it in the parse tree for later
* checking / optimization / codegen, it doesn't do anything with it
* like syntax check for legal use -- like it should as it's a TODO item
* which is not implemented
* Performs a parse operation: This is a macro to prevent bugs, if the
* calls to lex_token are'nt exactly enough to feed to the end of the
* actual lexees for the current thing that is being parsed, the state
* of the next iteration in the creation of the parse tree will be wrong
* and everything will fail.
*/
#define PARSE_TODO(X) { \
#define PARSE_PERFORM(X,C) { \
token = lex_token(file); \
{ C } \
while (token != '\n') { \
token = lex_token(file); \
} \
@ -178,7 +180,11 @@ void parse_clear(struct parsenode *tree) {
typedef_clear();
}
int parse(struct lex_file *file) {
/*
* Generates a parse tree out of the lexees generated by the lexer. This
* is where the tree is built. This is where valid check is performed.
*/
int parse_tree(struct lex_file *file) {
struct parsenode *parsetree = NULL;
struct parsenode *parseroot = NULL;
@ -203,29 +209,19 @@ int parse(struct lex_file *file) {
token != ERROR_PREPRO && file->length >= 0) {
switch (token) {
case TOKEN_IF:
//token = lex_token(file);
while ((token == ' ' || token == '\n') && file->length >= 0)
token = lex_token(file);
//if (token != '(')
// error(ERROR_PARSE, "Expected `(` after if\n", "");
PARSE_TREE_ADD(PARSE_TYPE_IF);
break;
case TOKEN_ELSE:
token = lex_token(file);
//while ((token == ' ' || token == '\n') && file->length >= 0)
// token = lex_token(file);
PARSE_TREE_ADD(PARSE_TYPE_ELSE);
break;
case TOKEN_FOR:
token = lex_token(file);
//token = lex_token(file);
while ((token == ' ' || token == '\n') && file->length >= 0)
token = lex_token(file);
//PARSE_TREE_ADD(PARSE_TYPE_FOR);
PARSE_TODO(PARSE_TYPE_FOR);
PARSE_TREE_ADD(PARSE_TYPE_FOR);
break;
/*
@ -243,9 +239,6 @@ int parse(struct lex_file *file) {
typedef_add(f, t);
/* free stdup strings */
//mem_d(f);
//mem_d(t);
free(f);
free(t);
@ -253,19 +246,27 @@ int parse(struct lex_file *file) {
token = lex_token(file);
break;
}
/*
* Returns are addable as-is, statement checking is during
* the actual parse tree check.
*/
case TOKEN_RETURN:
PARSE_TREE_ADD(PARSE_TYPE_RETURN);
break;
//PARSE_PERFORM(PARSE_TYPE_RETURN, {});
case TOKEN_DO: PARSE_TODO(PARSE_TYPE_DO);
case TOKEN_WHILE: PARSE_TODO(PARSE_TYPE_WHILE);
case TOKEN_BREAK: PARSE_TODO(PARSE_TYPE_BREAK);
case TOKEN_CONTINUE: PARSE_TODO(PARSE_TYPE_CONTINUE);
case TOKEN_RETURN: PARSE_TODO(PARSE_TYPE_RETURN);
case TOKEN_GOTO: PARSE_TODO(PARSE_TYPE_GOTO);
case TOKEN_VOID: PARSE_TODO(PARSE_TYPE_VOID);
case TOKEN_STRING: PARSE_TODO(PARSE_TYPE_STRING);
case TOKEN_FLOAT: PARSE_TODO(PARSE_TYPE_FLOAT);
case TOKEN_VECTOR: PARSE_TODO(PARSE_TYPE_VECTOR);
case TOKEN_ENTITY: PARSE_TODO(PARSE_TYPE_ENTITY);
case TOKEN_DO: PARSE_PERFORM(PARSE_TYPE_DO, {});
case TOKEN_WHILE: PARSE_PERFORM(PARSE_TYPE_WHILE, {});
case TOKEN_BREAK: PARSE_PERFORM(PARSE_TYPE_BREAK, {});
case TOKEN_CONTINUE: PARSE_PERFORM(PARSE_TYPE_CONTINUE,{});
case TOKEN_GOTO: PARSE_PERFORM(PARSE_TYPE_GOTO, {});
case TOKEN_VOID: PARSE_PERFORM(PARSE_TYPE_VOID, {});
case TOKEN_STRING: PARSE_PERFORM(PARSE_TYPE_STRING, {});
case TOKEN_FLOAT: PARSE_PERFORM(PARSE_TYPE_FLOAT, {});
case TOKEN_VECTOR: PARSE_PERFORM(PARSE_TYPE_VECTOR, {});
case TOKEN_ENTITY: PARSE_PERFORM(PARSE_TYPE_ENTITY, {});
/*
* From here down is all language punctuation: There is no
@ -392,6 +393,5 @@ int parse(struct lex_file *file) {
parse_debug(parseroot);
lex_reset(file);
parse_clear(parseroot);
return 1;
}

127
typedef.c
View file

@ -24,12 +24,6 @@
#include <stdint.h> /* replace if stdint.h doesn't exist! */
#include <limits.h>
#include "gmqcc.h"
/*
* This implements a hashtable for typedef type keywords which end up
* being translated to their full-expressed type. This uses a singly
* linked list with a fast hash function.
*/
static typedef_node *typedef_table[1024];
void typedef_init() {
@ -38,118 +32,19 @@ void typedef_init() {
typedef_table[i] = NULL;
}
/*
* Fast collisionless hashfunction based off of:
* http://www.azillionmonkeys.com/qed/hash.html
* By: Paul Hsieh
*
* The code is licensed under LGPL 2.1 or Paul
* Hsieh's derivative license. Stated on his page
* quote:
*
* The LGPL 2.1 is not necessarily a more liberal license than my
* derivative license, but this additional licensing makes the code
* available to more developers. Note that this does not give you
* multi-licensing rights. You can only use the code under one of
* the licenses at a time.
*
* Paul Hsieh derivative license
*
* The derivative content includes raw computer source code, ideas,
* opinions, and excerpts whose original source is covered under
* another license and transformations of such derivatives.
* Note that mere excerpts by themselves (with the exception of raw
* source code) are not considered derivative works under this license.
* Use and redistribution is limited to the following conditions:
*
* One may not create a derivative work which, in any way, violates the
* Paul Hsieh exposition license described above on the original content.
*
* One may not apply a license to a derivative work that precludes anyone
* else from using and redistributing derivative content.
*
* One may not attribute any derivative content to authors not involved
* in the creation of the content, though an attribution to the author
* is not necessary.
*
* Paul Hsieh exposition license
*
* The content of all text, figures, tables and displayed layout is
* copyrighted by its author and owner Paul Hsieh unless specifically
* denoted otherwise. Redistribution is limited to the following conditions:
*
* The redistributor must fully attribute the content's authorship and
* make a good faith effort to cite the original location of the original
* content.
*
* The content may not be modified via excerpt or otherwise with the
* exception of additional citations such as described above without prior
* consent of Paul Hsieh.
*
* The content may not be subject to a change in license without prior
* consent of Paul Hsieh.
*
* The content may be used for commercial purposes.
*/
#if (defined(__GNUC__) && defined(__i386__)) || defined(_MSC_VER)
/*
* Unalligned loads are faster if we can do them, otherwise fall back
* to safer version below.
*/
# define load16(D) (*((const uint16_t*)(D)))
#else
# define load16(D) ((((uint32_t)(((const uint8_t*)(D))[1])) << 8) + \
(uint32_t)(((const uint8_t*)(D))[0]))
#endif
unsigned int inline typedef_hash(const char *data) {
uint32_t hash = strlen(data);
uint32_t size = hash;
uint32_t temp = 0;
unsigned int typedef_hash(const char *s) {
unsigned int hash = 0;
unsigned int size = strlen(s);
unsigned int iter;
int last;
if (size <= 0|| data == NULL)
return -1;
last = size & 3;
size >>= 2;
/* main loop */
for (;size > 0; size--) {
hash += (load16(data));
temp = (load16(data+2) << 11) ^ hash;
hash = (hash << 16) ^ temp;
data += sizeof(uint16_t) << 1;
hash += hash >> 11;
for (iter = 0; iter < size; iter++) {
hash += s[iter];
hash += (hash << 10);
hash ^= (hash >> 6);
}
/* ends */
switch (last) {
case 3:
hash += load16(data);
hash ^= hash << 16;
hash ^= ((signed char)data[sizeof(uint16_t)]) << 8;
hash += hash >> 11;
break;
case 2:
hash += load16(data);
hash ^= hash << 11;
hash += hash >> 17;
break;
case 1:
hash += (signed char)*data;
hash ^= hash << 10;
hash += hash >> 1;
break;
}
/* force avalanching of final 127 bits */
hash ^= hash << 3;
hash += hash >> 5;
hash ^= hash << 4;
hash += hash >> 17;
hash ^= hash << 25;
hash += hash >> 6;
hash += (hash << 3);
hash ^= (hash >> 11);
hash += (hash << 15);
return hash % 1024;
}