mirror of
https://github.com/DarkPlacesEngine/gmqcc.git
synced 2024-12-11 21:11:09 +00:00
620 lines
24 KiB
C
620 lines
24 KiB
C
/*
|
|
* Copyright (C) 2012
|
|
* Dale Weiler
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy of
|
|
* this software and associated documentation files (the "Software"), to deal in
|
|
* the Software without restriction, including without limitation the rights to
|
|
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
|
* of the Software, and to permit persons to whom the Software is furnished to do
|
|
* so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in all
|
|
* copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
* SOFTWARE.
|
|
*/
|
|
#include "gmqcc.h"
|
|
/*
|
|
* Following parse states:
|
|
* ASM_FUNCTION -- in a function accepting input statements
|
|
* ....
|
|
*/
|
|
typedef enum {
|
|
ASM_NULL,
|
|
ASM_FUNCTION
|
|
} asm_state;
|
|
|
|
typedef struct {
|
|
char *name;
|
|
char type; /* type, float, vector, string, function*/
|
|
char elem; /* 0=x, 1=y, or 2=Z? */
|
|
int offset; /* location in globals */
|
|
bool isconst;
|
|
} asm_sym;
|
|
VECTOR_MAKE(asm_sym, asm_symbols);
|
|
|
|
/*
|
|
* Assembly text processing: this handles the internal collection
|
|
* of text to allow parsing and assemblation.
|
|
*/
|
|
static char* asm_getline(size_t *byte, FILE *fp) {
|
|
char *line = NULL;
|
|
size_t read = util_getline(&line, byte, fp);
|
|
*byte = read;
|
|
if (read == -1) {
|
|
mem_d (line);
|
|
return NULL;
|
|
}
|
|
return line;
|
|
}
|
|
|
|
/*
|
|
* Entire external interface for main.c - to perform actual assemblation
|
|
* of assembly files.
|
|
*/
|
|
void asm_init(const char *file, FILE **fp) {
|
|
*fp = fopen(file, "r");
|
|
code_init();
|
|
}
|
|
void asm_close(FILE *fp) {
|
|
fclose(fp);
|
|
code_write("program.dat");
|
|
}
|
|
void asm_clear() {
|
|
size_t i = 0;
|
|
for (; i < asm_symbols_elements; i++)
|
|
mem_d(asm_symbols_data[i].name);
|
|
mem_d(asm_symbols_data);
|
|
}
|
|
|
|
/*
|
|
* Dumps all values of all constants and assembly related
|
|
* information obtained during the assembly procedure.
|
|
*/
|
|
void asm_dumps() {
|
|
size_t i = 0;
|
|
for (; i < asm_symbols_elements; i++) {
|
|
asm_sym *g = &asm_symbols_data[i];
|
|
if (!g->isconst) continue;
|
|
switch (g->type) {
|
|
case TYPE_VECTOR: {
|
|
util_debug("ASM", "vector %s %c[%f]\n", g->name,
|
|
(g->elem == 0) ? 'X' :(
|
|
(g->elem == 1) ? 'Y' :
|
|
(g->elem == 2) ? 'Z' :' '),
|
|
INT2FLT(code_globals_data[g->offset])
|
|
);
|
|
break;
|
|
}
|
|
case TYPE_FUNCTION: {
|
|
util_debug("ASM", "function %s\n", g->name);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Parses a type, could be global or not depending on the
|
|
* assembly state: global scope with assignments are constants.
|
|
* globals with no assignments are globals. Function body types
|
|
* are locals.
|
|
*/
|
|
static GMQCC_INLINE bool asm_parse_type(const char *skip, size_t line, asm_state *state) {
|
|
if ((strstr(skip, "FLOAT:") != &skip[0]) &&
|
|
(strstr(skip, "VECTOR:") != &skip[0]) &&
|
|
(strstr(skip, "ENTITY:") != &skip[0]) &&
|
|
(strstr(skip, "FIELD:") != &skip[0]) &&
|
|
(strstr(skip, "STRING:") != &skip[0])) return false;
|
|
|
|
/* TODO: determine if constant, global, or local */
|
|
switch (*skip) {
|
|
/* VECTOR */ case 'V': {
|
|
float val1;
|
|
float val2;
|
|
float val3;
|
|
asm_sym sym;
|
|
|
|
char *find = (char*)skip + 7;
|
|
char *name = (char*)skip + 7;
|
|
while (*find == ' ' || *find == '\t') find++;
|
|
|
|
/* constant? */
|
|
if (strchr(find, ',')) {
|
|
/* strip name */
|
|
*strchr((name = util_strdup(find)), ',')='\0';
|
|
/* find data */
|
|
find += strlen(name) + 1;
|
|
while (*find == ' ' || *find == '\t') find++;
|
|
/* valid name */
|
|
if (util_strupper(name) || isdigit(*name)) {
|
|
printf("invalid name for vector variable\n");
|
|
mem_d(name);
|
|
}
|
|
/*
|
|
* Parse all three elements of the vector. This will only
|
|
* pass the first try if we hit a constant, otherwise it's
|
|
* a global.
|
|
*/
|
|
#define PARSE_ELEMENT(X,Y,Z) \
|
|
if (isdigit(*X) || *X == '-'||*X == '+') { \
|
|
bool negated = (*X == '-'); \
|
|
if (negated || *X == '+') { X++; } \
|
|
Y = (negated)?-atof(X):atof(X); \
|
|
X = strchr(X, ','); \
|
|
Z \
|
|
}
|
|
|
|
PARSE_ELEMENT(find, val1, { find ++; while (*find == ' ') { find ++; } });
|
|
PARSE_ELEMENT(find, val2, { find ++; while (*find == ' ') { find ++; } });
|
|
PARSE_ELEMENT(find, val3, { find ++; /* no need to do anything here */ });
|
|
#undef PARSE_ELEMENT
|
|
#define BUILD_ELEMENT(X,Y) \
|
|
sym.type = TYPE_VECTOR; \
|
|
sym.name = util_strdup(name); \
|
|
sym.elem = (X); \
|
|
sym.offset = code_globals_elements; \
|
|
asm_symbols_add(sym); \
|
|
code_globals_add(FLT2INT(Y))
|
|
BUILD_ELEMENT(0, val1);
|
|
BUILD_ELEMENT(1, val2);
|
|
BUILD_ELEMENT(2, val3);
|
|
#undef BUILD_ELEMENT
|
|
mem_d(name);
|
|
} else {
|
|
/* TODO global not constant */
|
|
}
|
|
break;
|
|
}
|
|
/* ENTITY */ case 'E': {
|
|
const char *find = skip + 7;
|
|
while (*find == ' ' || *find == '\t') find++;
|
|
printf("found ENTITY %s\n", find);
|
|
break;
|
|
}
|
|
/* STRING */ case 'S': {
|
|
const char *find = skip + 7;
|
|
while (*find == ' ' || *find == '\t') find++;
|
|
printf("found STRING %s\n", find);
|
|
break;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* Parses a function: trivial case, handles occurances of duplicated
|
|
* names among other things. Ensures valid name as well, and even
|
|
* internal engine function selection.
|
|
*/
|
|
static GMQCC_INLINE bool asm_parse_func(const char *skip, size_t line, asm_state *state) {
|
|
if (*state == ASM_FUNCTION)
|
|
return false;
|
|
|
|
if (strstr(skip, "FUNCTION:") == &skip[0]) {
|
|
asm_sym sym;
|
|
char *look = util_strdup(skip+10);
|
|
char *copy = look;
|
|
char *name = NULL;
|
|
while (*copy == ' ' || *copy == '\t') copy++;
|
|
|
|
memset(&sym, 0, sizeof(asm_sym));
|
|
|
|
/*
|
|
* Chop the function name out of the string, this allocates
|
|
* a new string.
|
|
*/
|
|
name = util_strchp(copy, strchr(copy, '\0'));
|
|
|
|
/* TODO: failure system, missing name */
|
|
if (!name) {
|
|
printf("expected name on function\n");
|
|
mem_d(copy);
|
|
mem_d(name);
|
|
return false;
|
|
}
|
|
/* TODO: failure system, invalid name */
|
|
if (!isalpha(*name) || util_strupper(name)) {
|
|
printf("invalid identifer for function name\n");
|
|
mem_d(copy);
|
|
mem_d(name);
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* Function could be internal function, look for $
|
|
* to determine this.
|
|
*/
|
|
if (strchr(name, ',')) {
|
|
char *find = strchr(name, ',') + 1;
|
|
prog_section_function function;
|
|
prog_section_def def;
|
|
memset(&function, 0, sizeof(prog_section_function));
|
|
memset(&def, 0, sizeof(prog_section_def));
|
|
|
|
/* skip whitespace */
|
|
while (*find == ' ' || *find == '\t')
|
|
find++;
|
|
|
|
if (*find != '$') {
|
|
printf("expected $ for internal function selection, got %s instead\n", find);
|
|
mem_d(copy);
|
|
mem_d(name);
|
|
return false;
|
|
}
|
|
find ++;
|
|
if (!isdigit(*find)) {
|
|
printf("invalid internal identifier, expected valid number\n");
|
|
mem_d(copy);
|
|
mem_d(name);
|
|
return false;
|
|
}
|
|
*strchr(name, ',')='\0';
|
|
|
|
/*
|
|
* Now add the following items to the code system:
|
|
* function
|
|
* definition (optional)
|
|
* global (optional)
|
|
* name
|
|
*/
|
|
function.entry = -atoi(find);
|
|
function.firstlocal = 0;
|
|
function.locals = 0;
|
|
function.profile = 0;
|
|
function.name = code_chars_elements;
|
|
function.file = 0;
|
|
function.nargs = 0;
|
|
def.type = TYPE_FUNCTION;
|
|
def.offset = code_globals_elements;
|
|
def.name = code_chars_elements;
|
|
code_functions_add(function);
|
|
code_defs_add (def);
|
|
code_chars_put (name, strlen(name));
|
|
code_chars_add ('\0');
|
|
sym.type = TYPE_FUNCTION;
|
|
sym.name = util_strdup(name);
|
|
sym.offset = function.entry;
|
|
asm_symbols_add(sym);
|
|
|
|
util_debug("ASM", "added internal function %s to function table\n", name);
|
|
|
|
/*
|
|
* Sanatize the numerical constant used to select the
|
|
* internal function. Must ensure it's all numeric, since
|
|
* atoi can silently drop characters from a string and still
|
|
* produce a valid constant that would lead to runtime problems.
|
|
*/
|
|
if (util_strdigit(find))
|
|
util_debug("ASM", "found internal function %s, -%d\n", name, atoi(find));
|
|
else
|
|
printf("invalid internal function identifier, must be all numeric\n");
|
|
|
|
} else {
|
|
/*
|
|
* The function isn't an internal one. Determine the name and
|
|
* amount of arguments the function accepts by searching for
|
|
* the `#` (pound sign).
|
|
*/
|
|
int args = 0;
|
|
int size = 0;
|
|
char *find = strchr(name, '#');
|
|
char *peek = find;
|
|
|
|
/*
|
|
* Code structures for filling after determining the correct
|
|
* information to add to the code write system.
|
|
*/
|
|
prog_section_function function;
|
|
prog_section_def def;
|
|
memset(&function, 0, sizeof(prog_section_function));
|
|
memset(&def, 0, sizeof(prog_section_def));
|
|
if (find) {
|
|
find ++;
|
|
|
|
/* skip whitespace */
|
|
if (*find == ' ' || *find == '\t')
|
|
find++;
|
|
|
|
/*
|
|
* If the input is larger than eight, it's considered
|
|
* invalid and shouldn't be allowed. The QuakeC VM only
|
|
* allows a maximum of eight arguments.
|
|
*/
|
|
if (*find == '9') {
|
|
printf("invalid number of arguments, must be a valid number from 0-8\n");
|
|
mem_d(copy);
|
|
mem_d(name);
|
|
return false;
|
|
}
|
|
|
|
if (*find != '0') {
|
|
/*
|
|
* if we made it this far we have a valid number for the
|
|
* argument count, so fall through a switch statement and
|
|
* do it.
|
|
*/
|
|
switch (*find) {
|
|
case '8': args++; case '7': args++;
|
|
case '6': args++; case '5': args++;
|
|
case '4': args++; case '3': args++;
|
|
case '2': args++; case '1': args++;
|
|
}
|
|
}
|
|
/*
|
|
* We need to parse the argument size now by determining
|
|
* the argument identifer list used after the amount of
|
|
* arguments.
|
|
*/
|
|
memset(function.argsize, 0, sizeof(function.argsize));
|
|
find ++; /* skip the number */
|
|
while (*find == ' ' || *find == '\t') find++;
|
|
while (size < args) {
|
|
switch (*find) {
|
|
case 'V': case 'v': function.argsize[size]=3; break;
|
|
case 'S': case 's':
|
|
case 'F': case 'f':
|
|
case 'E': case 'e': function.argsize[size]=1; break;
|
|
case '\0':
|
|
printf("missing argument identifer, expected %d\n", args);
|
|
return false;
|
|
default:
|
|
printf("error invalid function argument identifier\n");
|
|
return false;
|
|
}
|
|
size++,find++;
|
|
}
|
|
while (*find == ' ' || *find == '\t') find++;
|
|
if (*find != '\0') {
|
|
printf("too many function argument identifers expected %d\n", args);
|
|
return false;
|
|
}
|
|
} else {
|
|
printf("missing number of argument count in function %s\n", name);
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* Now we need to strip the name apart into it's exact size
|
|
* by working in the peek buffer till we hit the name again.
|
|
*/
|
|
if (*peek == '#') {
|
|
peek --; /* '#' */
|
|
peek --; /* number */
|
|
}
|
|
while (*peek == ' ' || *peek == '\t') peek--;
|
|
|
|
/*
|
|
* We're guranteed to be exactly where we need to be in the
|
|
* peek buffer to null terminate and get our name from name
|
|
* without any garbage before or after it.
|
|
*/
|
|
*++peek='\0';
|
|
|
|
/*
|
|
* We got valid function structure information now. Lets add
|
|
* the function to the code writer function table.
|
|
*/
|
|
function.entry = code_statements_elements;
|
|
function.firstlocal = 0;
|
|
function.locals = 0;
|
|
function.profile = 0;
|
|
function.name = code_chars_elements;
|
|
function.file = 0;
|
|
function.nargs = args;
|
|
def.type = TYPE_FUNCTION;
|
|
def.offset = code_globals_elements;
|
|
def.name = code_chars_elements;
|
|
code_functions_add(function);
|
|
code_globals_add (code_statements_elements);
|
|
code_chars_put (name, strlen(name));
|
|
code_chars_add ('\0');
|
|
sym.type = TYPE_FUNCTION;
|
|
sym.name = util_strdup(name);
|
|
sym.offset = function.entry;
|
|
asm_symbols_add(sym);
|
|
|
|
/* update assembly state */
|
|
|
|
*state = ASM_FUNCTION;
|
|
util_debug("ASM", "added context function %s to function table\n", name);
|
|
}
|
|
|
|
mem_d(copy);
|
|
mem_d(name);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static GMQCC_INLINE bool asm_parse_stmt(const char *skip, size_t line, asm_state *state) {
|
|
/*
|
|
* This parses a valid statement in assembly and adds it to the code
|
|
* table to be wrote. This needs to handle correct checking of all
|
|
* statements to ensure the correct amount of operands are passed to
|
|
* the menomic. This must also check for valid function calls (ensure
|
|
* the names selected exist in the program scope) and ensure the correct
|
|
* CALL* is used (depending on the amount of arguments the function
|
|
* is expected to take)
|
|
*/
|
|
enum {
|
|
EXPECT_FUNCTION = 1,
|
|
EXPECT_VARIABLE = 2,
|
|
EXPECT_VALUE = 3
|
|
};
|
|
|
|
char *c = (char*)skip;
|
|
size_t i = 0;
|
|
char expect = 0;
|
|
prog_section_statement s;
|
|
memset(&s, 0, sizeof(prog_section_statement));
|
|
|
|
/*
|
|
* statements are only allowed when inside a function body
|
|
* otherwise the assembly is invalid.
|
|
*/
|
|
if (*state != ASM_FUNCTION)
|
|
return false;
|
|
|
|
/*
|
|
* Skip any possible whitespace, it's not wanted we're searching
|
|
* for an instruction. TODO: recrusive decent parser skip on line
|
|
* entry instead of pre-op.
|
|
*/
|
|
while (*skip == ' ' || *skip == '\t')
|
|
skip++;
|
|
|
|
for (; i < sizeof(asm_instr)/sizeof(*asm_instr); i++) {
|
|
/*
|
|
* Iterate all possible instructions and check if the selected
|
|
* instructure in the input stream `skip` is actually a valid
|
|
* instruction.
|
|
*/
|
|
if (!strncmp(skip, asm_instr[i].m, asm_instr[i].l)) {
|
|
|
|
/*
|
|
* We hit the end of a function scope, retarget the state
|
|
* and add a DONE statement to the statment table.
|
|
*/
|
|
if (i == AINSTR_END) {
|
|
s.opcode = i;
|
|
code_statements_add(s);
|
|
*state = ASM_NULL;
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* Check the instruction type to see what sort of data
|
|
* it's expected to have.
|
|
*/
|
|
if (i >= INSTR_CALL0 && i <= INSTR_CALL8)
|
|
expect = EXPECT_FUNCTION;
|
|
else
|
|
expect = EXPECT_VARIABLE;
|
|
|
|
util_debug(
|
|
"ASM",
|
|
"found statement %s expecting: `%s` (%ld operand(s))\n",
|
|
asm_instr[i].m,
|
|
(expect == EXPECT_FUNCTION)?"function name":(
|
|
(expect == EXPECT_VARIABLE)?"variable name":(
|
|
(expect == EXPECT_VALUE ?"value" : "unknown"))),
|
|
asm_instr[i].o
|
|
);
|
|
/*
|
|
* Parse the operands for `i` (the instruction). The order
|
|
* of asm_instr is in the order of the menomic encoding so
|
|
* `i` == menomic encoding.
|
|
*/
|
|
s.opcode = i;
|
|
switch (asm_instr[i].o) {
|
|
/*
|
|
* Each instruction can have from 0-3 operands; and can
|
|
* be used with less or more operands depending on it's
|
|
* selected use.
|
|
*
|
|
* DONE for example can use either 0 operands, or 1 (to
|
|
* emulate the effect of RETURN)
|
|
*
|
|
* TODO: parse operands correctly figure out what it is
|
|
* that the assembly is trying to do, i.e string table
|
|
* lookup, function calls etc.
|
|
*
|
|
* This needs to have a fall state, we start from the
|
|
* end of the string and work backwards.
|
|
*/
|
|
#define OPEATS(X,Y) X##Y
|
|
#define OPCCAT(X,Y) OPEATS(X,Y)
|
|
#define OPLOAD(X,Y) \
|
|
do { \
|
|
util_debug("ASM", "loading operand data ...\n"); \
|
|
if (expect == EXPECT_VARIABLE) { \
|
|
size_t f=0; \
|
|
for (; f<asm_symbols_elements; f++) { \
|
|
if (!strncmp(asm_symbols_data[f].name, (Y), strlen(Y)) && \
|
|
asm_symbols_data[f].type != TYPE_FUNCTION) { \
|
|
(X)=asm_symbols_data[f].offset; \
|
|
goto OPCCAT(foundv, __LINE__); \
|
|
} \
|
|
} \
|
|
printf("no variable named %s\n", (Y)); \
|
|
break; \
|
|
OPCCAT(foundv,__LINE__) : \
|
|
printf("operand loaded for %s\n", (Y)); \
|
|
} else if (expect == EXPECT_FUNCTION) { \
|
|
/* \
|
|
* It's a function call not a variable association with an instruction \
|
|
* these are harder to handle. \
|
|
*/ \
|
|
size_t f=0; \
|
|
if (strchr(Y, ' ')) { \
|
|
*strchr(Y, ' ')='\0'; \
|
|
} \
|
|
for (; f<asm_symbols_elements; f++) { \
|
|
if (!strncmp(asm_symbols_data[f].name, (Y), strlen(Y)) && \
|
|
asm_symbols_data[f].type == TYPE_FUNCTION) { \
|
|
(X)=asm_symbols_data[f].offset; \
|
|
goto OPCCAT(foundf, __LINE__); \
|
|
} \
|
|
} \
|
|
printf("no function named [%s]\n", (Y)); \
|
|
break; \
|
|
OPCCAT(foundf,__LINE__) : \
|
|
printf("operand loaded for [%s]\n", (Y)); \
|
|
} \
|
|
} while (0)
|
|
case 3: { OPLOAD(s.o3.s1,c); break; }
|
|
case 2: { OPLOAD(s.o2.s1,c); break; }
|
|
case 1: {
|
|
while (*c == ' ' || *c == '\t') c++;
|
|
c += asm_instr[i].l;
|
|
while (*c == ' ' || *c == '\t') c++;
|
|
OPLOAD(s.o1.s1, c);
|
|
break;
|
|
}
|
|
#undef OPLOAD
|
|
#undef OPCCAT
|
|
}
|
|
/* add the statement now */
|
|
code_statements_add(s);
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void asm_parse(FILE *fp) {
|
|
char *data = NULL;
|
|
long line = 1; /* current line */
|
|
size_t size = 0; /* size of line */
|
|
asm_state state = ASM_NULL;
|
|
|
|
#define asm_end(x) \
|
|
do { \
|
|
mem_d(data); \
|
|
line ++; \
|
|
util_debug("ASM", x); \
|
|
} while (0); continue
|
|
|
|
while ((data = asm_getline (&size, fp)) != NULL) {
|
|
char *copy = data;
|
|
char *skip = copy;
|
|
while (*copy == ' ' || *copy == '\t') copy++;
|
|
while (*skip != '\n') skip++;
|
|
*skip='\0';
|
|
|
|
if (asm_parse_type(copy, line, &state)){ asm_end("asm_parse_type\n"); }
|
|
if (asm_parse_func(copy, line, &state)){ asm_end("asm_parse_func\n"); }
|
|
if (asm_parse_stmt(copy, line, &state)){ asm_end("asm_parse_stmt\n"); }
|
|
asm_end("asm_parse_white\n");
|
|
}
|
|
#undef asm_end
|
|
asm_dumps();
|
|
asm_clear();
|
|
}
|