quakeforge/libs/gib/gib_parse.c
Brian Koropoff 020a9262f7 Made the parser anal about concatenation usage and fixed several bugs by
running /dev/urandom through carne to catch problems.
2003-02-25 08:04:48 +00:00

692 lines
18 KiB
C

/*
gib_parse.c
GIB parser functions
Copyright (C) 2002 Brian Koropoff
Author: Brian Koropoff
Date: #DATE#
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to:
Free Software Foundation, Inc.
59 Temple Place - Suite 330
Boston, MA 02111-1307, USA
*/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
static __attribute__ ((unused))
const char rcsid[] = "$Id$";
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
#include "QF/sys.h"
#include "QF/dstring.h"
#include "QF/va.h"
#include "QF/cmd.h"
#include "QF/cbuf.h"
#include "QF/gib_buffer.h"
#include "QF/gib_process.h"
#include "QF/gib_builtin.h"
#include "QF/gib_function.h"
#include "QF/gib_vars.h"
#include "QF/gib_parse.h"
/*
GIB_Escaped
Returns true if character i in str is
escaped with a backslash (and the backslash
is not itself escaped).
*/
inline qboolean
GIB_Escaped (const char *str, int i)
{
int n, c;
if (!i)
return 0;
for (n = i - 1, c = 0; n >= 0 && str[n] == '\\'; n--, c++);
return c & 1;
}
/*
GIB_Parse_Match_*
These are the workhorses of the GIB parser. They iterate
an index variable through a string until an appropriate
matching character is found, calling themselves and their
neighbors recursively to handle sections of string that they
are uninterested in.
FIXME: Make sure everything is calling everything else it might
need to. Make appropriate functions intolerant of newlines.
*/
static char
GIB_Parse_Match_Dquote (const char *str, unsigned int *i)
{
unsigned int n = *i;
for ((*i)++; str[*i]; (*i)++) {
if (str[*i] == '\n')
break;
else if (str[*i] == '\"' && !GIB_Escaped (str, *i))
return 0;
}
*i = n;
return '\"';
}
char
GIB_Parse_Match_Brace (const char *str, unsigned int *i)
{
char c;
unsigned int n = *i;
for ((*i)++; str[*i]; (*i)++) {
if (str[*i] == '\"') {
if ((c = GIB_Parse_Match_Dquote (str, i)))
return c;
} else if (str[*i] == '{') {
if ((c = GIB_Parse_Match_Brace (str, i)))
return c;
} else if (str[*i] == '}')
return 0;
}
*i = n;
return '{';
}
char
GIB_Parse_Match_Paren (const char *str, unsigned int *i)
{
char c;
unsigned int n = *i;
for ((*i)++; str[*i]; (*i)++) {
if (str[*i] == '(') {
if ((c = GIB_Parse_Match_Paren (str, i)))
return c;
} else if (str[*i] == '\"') {
if ((c = GIB_Parse_Match_Dquote (str, i)))
return c;
} else if (str[*i] == ')')
return 0;
}
*i = n;
return '(';
}
char
GIB_Parse_Match_Backtick (const char *str, unsigned int *i)
{
char c;
unsigned int n = *i;
for ((*i)++; str[*i]; (*i)++) {
if (str[*i] == '`')
return 0;
else if (str[*i] == '\"') { // Skip over strings as usual
if ((c = GIB_Parse_Match_Dquote (str, i)))
return c;
}
}
*i = n;
return '`';
}
char
GIB_Parse_Match_Index (const char *str, unsigned int *i)
{
char c;
unsigned int n = *i;
for ((*i)++; str[*i]; (*i)++) {
if (str[*i] == '[' && (c = GIB_Parse_Match_Index (str, i)))
return c;
else if (str[*i] == ']')
return 0;
}
*i = n;
return '[';
}
char
GIB_Parse_Match_Var (const char *str, unsigned int *i)
{
char c;
(*i)++;
if (str[*i] == '{' && (c = GIB_Parse_Match_Brace (str, i)))
return c;
else {
for (; isalnum ((byte) str[*i]) || str[*i] == '_'; (*i)++);
if (str[*i] == '[') {
if ((c = GIB_Parse_Match_Index (str, i)))
return c;
(*i)++;
}
}
return 0;
}
qboolean gib_parse_error;
unsigned int gib_parse_error_pos;
const char *gib_parse_error_msg;
static void
GIB_Parse_Error (const char *msg, unsigned int pos)
{
gib_parse_error = true;
gib_parse_error_msg = msg;
gib_parse_error_pos = pos;
}
const char *
GIB_Parse_ErrorMsg (void)
{
return gib_parse_error_msg;
}
unsigned int
GIB_Parse_ErrorPos (void)
{
return gib_parse_error_pos;
}
// FIXME: Concatenation in stupid circumstances should generate errors
static gib_tree_t *
GIB_Parse_Tokens (const char *program, unsigned int *i, unsigned int pofs, gib_tree_t ** embedded)
{
char c, delim, *str;
unsigned int tstart, start;
gib_tree_t *nodes = 0, *cur, *new, *embs = 0, *tmp;
gib_tree_t **node = &nodes;
enum {CAT_NORMAL = 0, CAT_DISALLOW, CAT_CONCAT} cat = CAT_DISALLOW;
const char *catestr = "Comma found before first argument, nothing to concatenate to.";
gib_parse_error = false;
while (1) {
// Skip whitespace
while (program[*i] != '\n' && isspace ((byte) program[*i]))
(*i)++;
// Check for concatenation, skip comma and any more whitespace
if (program[*i] == ',') {
if (cat == CAT_DISALLOW) {
GIB_Parse_Error(catestr, *i + pofs);
goto ERROR;
}
cat = CAT_CONCAT;
(*i)++;
continue;
} else
cat = CAT_NORMAL;
// New line/command?
if (!program[*i] || program[*i] == '\n' || program[*i] == ';')
break;
// Save our start position
start = *i;
tstart = start + 1;
delim = program[*i];
switch (delim) {
case '{':
if ((c = GIB_Parse_Match_Brace (program, i)))
goto ERROR;
break;
case '\"':
if ((c = GIB_Parse_Match_Dquote (program, i)))
goto ERROR;
break;
case '(':
if ((c = GIB_Parse_Match_Paren (program, i)))
goto ERROR;
break;
default:
// Find the end of a "normal" token
delim = ' ';
tstart = *i;
for (;
program[*i] && !isspace ((byte) program[*i])
&& program[*i] != ',' && program[*i] != ';'; (*i)++) {
if (program[*i] == '{') {
if ((c = GIB_Parse_Match_Brace (program, i)))
goto ERROR;
} else if (program[*i] == '(') {
if ((c = GIB_Parse_Match_Paren (program, i)))
goto ERROR;
} else if (program[*i] == '`') {
if ((c = GIB_Parse_Match_Backtick (program, i)))
goto ERROR;
// Handle comments
} else if (program[*i] == '/' && program[*i + 1] == '/') {
for ((*i) += 2; program[*i] && program[*i] != '\n';
(*i)++);
goto DONE;
}
}
}
c = 0;
cur = *node = GIB_Tree_New (TREE_T_ARG);
cur->start = start + pofs;
cur->end = *i + pofs;
cur->delim = delim;
str = calloc (*i - tstart + 1, sizeof (char));
memcpy (str, program + tstart, *i - tstart);
if (cur->delim == '{') {
if (cat == CAT_CONCAT) {
GIB_Parse_Error ("Program blocks may not be concatenated with other arguments.", start + pofs);
goto ERROR;
}
catestr = "Program blocks may not be concatenated with other arguments.";
cat = CAT_DISALLOW;
// Try to parse sub-program
if (!(new = GIB_Parse_Lines (str, tstart + pofs)))
goto ERROR;
cur->children = new;
// Check for embedded commands/variables
} else if (cur->delim == ' ' || cur->delim == '(') {
if (!
(cur->children =
GIB_Parse_Embedded (str, tstart + pofs, &new))) {
// There could be no embedded elements, so check for a real
// error
if (gib_parse_error)
goto ERROR;
} else {
// Link/set flags
cur->flags |= TREE_A_EMBED;
// Add any embedded commands to top of chain
if (new) {
for (tmp = new; tmp->next; tmp = tmp->next);
tmp->next = embs;
embs = new;
}
}
// Check for array splitting
// Concatenating this onto something else is non-sensical
if (cur->delim == ' ' && (str[0] == '@' || str[0] == '%')) {
if (cat == CAT_CONCAT) {
GIB_Parse_Error ("Variable expansions may not be concatenated with other arguments.", start + pofs);
goto ERROR;
}
catestr = "Variable expansions may not be concatenated with other arguments.";
cat = CAT_DISALLOW;
cur->flags |= TREE_A_EXPAND;
}
// We can handle escape characters now
} else if (cur->delim == '\"')
GIB_Process_Escapes (str);
cur->str = str;
if (cat == CAT_CONCAT)
cur->flags |= TREE_A_CONCAT;
// Nothing left to parse?
if (!program[*i])
break;
// On non-normal tokens, move past the delimeter
if (cur->delim != ' ')
(*i)++;
node = &cur->next;
}
DONE:
*embedded = embs;
return nodes;
ERROR:
if (c)
GIB_Parse_Error (va ("Could not find match for '%c'.", c), *i + pofs);
if (nodes)
GIB_Tree_Free_Recursive (nodes);
return 0;
}
static gib_tree_t *
GIB_Parse_Semantic_Preprocess (gib_tree_t * line)
{
gib_tree_t *p, *start = line;
while (!strcmp (line->children->str, "if")
|| !strcmp (line->children->str, "ifnot")) {
// Sanity checking
if (!line->children->next || !line->children->next->next) {
GIB_Parse_Error ("Not enough arguments to 'if' statement.",
line->start);
return line;
} else if (!line->children->next->next->children
|| line->children->next->next->delim != '{') {
GIB_Parse_Error
("First program block in 'if' statement not enclosed in braces or invalid.",
line->start);
return line;
} else if (line->flags & TREE_L_EMBED) {
GIB_Parse_Error
("'if' statements may not be used in embedded commands.",
line->start);
return line;
}
// Set as conditional
line->type = TREE_T_COND;
if (line->children->str[2])
line->flags |= TREE_L_NOT;
// Save our spot
p = line;
// Move subprogram inline
line->next = line->children->next->next->children;
line->children->next->next->children = 0;
// Find end of subprogram
while (line->next)
line = line->next;
// Handle "else"
if (p->children->next->next->next
&& !strcmp (p->children->next->next->next->str, "else")) {
// Sanity checking
if (!p->children->next->next->next->next) {
GIB_Parse_Error
("'if' statement contains 'else' but no secondary program block or command.",
line->start);
return line;
}
// On 'true' first block must jump past this
// We will figure out jump target later
line->next = GIB_Tree_New (TREE_T_JUMPPLUS);
line = line->next;
// Jump to else block on 'false'
p->jump = line;
// Is "else" followed by a subprogram?
if (p->children->next->next->next->next->delim == '{') {
// Move subprogram inline
line->next = p->children->next->next->next->next->children;
p->children->next->next->next->next->children = 0;
while (line->next)
line = line->next;
} else {
// Push rest of tokens into a new line
line->next = GIB_Tree_New (TREE_T_CMD);
line->next->children = p->children->next->next->next->next;
p->children->next->next->next->next = 0;
line = line->next;
}
} else {
// Jump past block on 'false'
p->jump = line;
break; // Don't touch if statements in the sub program
}
}
// Now we know exit point from if-else if chain, set our jumps
while (start) {
if (start->type == TREE_T_JUMPPLUS && !start->jump)
start->jump = line;
start = start->next;
}
// Nothing expanded from a line remains, exit now
if (!line->children)
return line;
// If we have a while loop, handle that
if (!strcmp (line->children->str, "while")) {
// Sanity checks
if (!line->children->next || !line->children->next->next) {
GIB_Parse_Error ("Not enough arguments to 'while' statement.",
line->start);
return line;
} else if (!line->children->next->next->children
|| line->children->next->next->delim != '{') {
GIB_Parse_Error
("Program block in 'while' statement not enclosed in braces or invalid.",
line->start);
return line;
} else if (line->flags & TREE_L_EMBED) {
GIB_Parse_Error
("'while' statements may not be used in embedded commands.",
line->start);
return line;
}
// Set conditional flag
line->type = TREE_T_COND;
// Save our spot
p = line;
// Move subprogram inline
line->next = line->children->next->next->children;
line->children->next->next->children = 0;
// Find end of subprogram
for (; line->next; line = line->next)
if (!line->jump && line->children) {
if (!strcmp (line->children->str, "continue")) {
line->type = TREE_T_JUMP;
line->jump = p;
} else if (!strcmp (line->children->str, "break"))
line->type = TREE_T_JUMPPLUS;
}
line->next = GIB_Tree_New (TREE_T_JUMP);
line->next->jump = p;
line = line->next;
// Mark jump point out of loop
p->jump = line;
// Set jumps out of loop for "break" commands;
while (p) {
if (p->type == TREE_T_JUMPPLUS && !p->jump)
p->jump = line;
p = p->next;
}
} else if (!strcmp (line->children->str, "for")) {
gib_tree_t *tmp;
// Sanity checks
if (!line->children->next || !line->children->next->next
|| strcmp (line->children->next->next->str, "in")
|| !line->children->next->next->next
|| !line->children->next->next->next->next) {
GIB_Parse_Error ("Malformed 'for' statement.", line->start);
return line;
}
// Find last token in line (contains program block)
for (tmp = line->children->next->next->next->next; tmp->next;
tmp = tmp->next);
// More sanity
if (tmp->delim != '{' || !tmp->children) {
GIB_Parse_Error
("Program block in 'for' statement not enclosed in braces or invalid.",
line->start);
return line;
}
// Add instruction to fetch next argument (this is the true loop start)
line->next = GIB_Tree_New (TREE_T_FORNEXT);
line = line->next;
p = line;
// Move subprogram inline
line->next = tmp->children;
tmp->children = 0;
// Find end of subprogram
for (; line->next; line = line->next)
if (!line->jump && line->children) {
if (!strcmp (line->children->str, "continue")) {
line->type = TREE_T_JUMP;
line->jump = p;
} else if (!strcmp (line->children->str, "break"))
line->type = TREE_T_JUMPPLUS;
}
line->next = GIB_Tree_New (TREE_T_JUMP);
line->next->jump = p;
line = line->next;
// Mark jump point out of loop
p->jump = line;
// Mark jump point out of loop for break command
while (p) {
if (p->type == TREE_T_JUMPPLUS && !p->jump)
p->jump = line;
p = p->next;
}
} else if (line->children->next && line->children->next->delim == ' ' && !strcmp (line->children->next->str, "="))
line->type = TREE_T_ASSIGN;
return line;
}
gib_tree_t *
GIB_Parse_Lines (const char *program, unsigned int pofs)
{
unsigned int i = 0, lstart;
gib_tree_t *lines = 0, *cur, *tokens, **line = &lines, *embs;
char *str;
while (1) {
while (isspace ((byte) program[i]) || program[i] == ';')
i++;
if (!program[i])
break;
lstart = i;
// If we parse something useful...
if ((tokens = GIB_Parse_Tokens (program, &i, pofs, &embs))) {
// Link it in
cur = GIB_Tree_New (TREE_T_CMD);
cur->delim = '\n';
str = calloc (i - lstart + 1, sizeof (char));
memcpy (str, program + lstart, i - lstart);
cur->str = str;
cur->start = lstart + pofs;
cur->end = i + pofs;
cur->children = tokens;
// Line contains embedded commands?
if (embs) {
// Add them to chain before actual line
*line = embs;
for (; embs->next; embs = embs->next);
embs->next = cur;
} else
*line = cur;
// Do preprocessing
line = &(GIB_Parse_Semantic_Preprocess (cur))->next;
}
if (gib_parse_error)
goto ERROR;
}
return lines;
ERROR:
if (lines)
GIB_Tree_Free_Recursive (lines);
return 0;
}
gib_tree_t *
GIB_Parse_Embedded (const char *program, unsigned int pofs, gib_tree_t ** embedded)
{
unsigned int i, n, t;
char c, d, *str;
gib_tree_t *lines = 0, **line = &lines, *cur, *tokens, *emb, *tmp;
unsigned int start, end;
gib_parse_error = false;
*embedded = 0;
for (i = 0; program[i]; i++) {
if (program[i] == '`' || (program[i] == '$' && program[i + 1] == '(')) {
// Extract the embedded command
start = i;
if (program[i] == '`') {
n = i + 1;
if ((c = GIB_Parse_Match_Backtick (program, &i)))
goto ERROR;
} else {
n = ++i + 1;
if ((c = GIB_Parse_Match_Paren (program, &i)))
goto ERROR;
}
end = i + 1;
// Construct the actual line to be executed
cur = GIB_Tree_New (TREE_T_CMD);
cur->flags |= TREE_L_EMBED;
cur->delim = '`';
str = calloc (i - n + 1, sizeof (char));
memcpy (str, program + n, i - n);
cur->str = str;
cur->start = start + pofs;
cur->end = end + pofs;
c = 0;
t = 0;
if (!
(tokens =
GIB_Parse_Tokens (cur->str, &t, start + pofs, &emb)))
goto ERROR;
cur->children = tokens;
GIB_Parse_Semantic_Preprocess (cur)->next = *embedded;
if (gib_parse_error)
goto ERROR;
// Did this have embedded commands of it's own?
if (emb) {
// Link them in first
for (tmp = emb; tmp->next; tmp = tmp->next);
tmp->next = cur;
*embedded = emb;
} else
*embedded = cur;
// Create a representative child node for GIB_Process_Embedded to
// use
cur = GIB_Tree_New (TREE_T_META);
cur->delim = '`';
// Save start/end indices
cur->start = start;
cur->end = end;
*line = cur;
line = &cur->next;
// Check for variable substitution
} else if (program[i] == '$' || program[i] == '#') {
// Extract variable name
start = i;
end = 0;
d = program[i];
if (program[i + 1] == '{') {
n = i + 2;
end++;
} else
n = i + 1;
if ((c = GIB_Parse_Match_Var (program, &i)))
goto ERROR;
end += i;
cur = GIB_Tree_New (TREE_T_META);
cur->delim = d;
str = calloc (i - n + 1, sizeof (char));
memcpy (str, program + n, i - n);
cur->str = str;
// Can we use the name as is, or must processing be done at
// runtime?
if (strchr (str, '$') || strchr (str, '#'))
cur->flags |= TREE_A_EMBED;
// Save start/end indices
cur->start = start;
cur->end = end;
*line = cur;
line = &cur->next;
// Don't skip anything important
if (program[n - 1] != '{')
i--;
}
}
return lines;
ERROR:
if (c)
GIB_Parse_Error (va ("Could not find match for '%c'.", c), i + pofs);
if (lines)
GIB_Tree_Free_Recursive (lines);
return 0;
}