/*
	gib_parse.c

	GIB parser functions

	Copyright (C) 2002 Brian Koropoff

	Author: Brian Koropoff
	Date: #DATE#

	This program is free software; you can redistribute it and/or
	modify it under the terms of the GNU General Public License
	as published by the Free Software Foundation; either version 2
	of the License, or (at your option) any later version.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

	See the GNU General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with this program; if not, write to:

		Free Software Foundation, Inc.
		59 Temple Place - Suite 330
		Boston, MA  02111-1307, USA

*/

#ifdef HAVE_CONFIG_H
# include "config.h"
#endif

static __attribute__ ((unused))
const char  rcsid[] = "$Id$";

#include <ctype.h>
#include <string.h>
#include <stdlib.h>

#include "QF/sys.h"
#include "QF/dstring.h"
#include "QF/va.h"
#include "QF/cmd.h"
#include "QF/cbuf.h"
#include "QF/gib_buffer.h"
#include "QF/gib_process.h"
#include "QF/gib_builtin.h"
#include "QF/gib_function.h"
#include "QF/gib_vars.h"
#include "QF/gib_parse.h"


/* 
	GIB_Escaped

	Returns true if character i in str is
	escaped with a backslash (and the backslash
	is not itself escaped).
*/
inline qboolean
GIB_Escaped (const char *str, int i)
{
	int         n, c;

	if (!i)
		return 0;
	for (n = i - 1, c = 0; n >= 0 && str[n] == '\\'; n--, c++);
	return c & 1;
}

/*
	GIB_Parse_Match_*
	
	These are the workhorses of the GIB parser.  They iterate
	an index variable through a string until an appropriate
	matching character is found, calling themselves and their
	neighbors recursively to handle sections of string that they
	are uninterested in.
	
	FIXME: Make sure everything is calling everything else it might
	need to.  Make appropriate functions intolerant of newlines.
*/

static char
GIB_Parse_Match_Dquote (const char *str, unsigned int *i)
{
	unsigned int n = *i;

	for ((*i)++; str[*i]; (*i)++) {
		if (str[*i] == '\n')
			break;
		else if (str[*i] == '\"' && !GIB_Escaped (str, *i))
			return 0;
	}
	*i = n;
	return '\"';
}

char
GIB_Parse_Match_Brace (const char *str, unsigned int *i)
{
	char        c;
	unsigned int n = *i;

	for ((*i)++; str[*i]; (*i)++) {
		if (str[*i] == '\"') {
			if ((c = GIB_Parse_Match_Dquote (str, i)))
				return c;
		} else if (str[*i] == '{') {
			if ((c = GIB_Parse_Match_Brace (str, i)))
				return c;
		} else if (str[*i] == '}')
			return 0;
	}
	*i = n;
	return '{';
}

char
GIB_Parse_Match_Paren (const char *str, unsigned int *i)
{
	char        c;
	unsigned int n = *i;

	for ((*i)++; str[*i]; (*i)++) {
		if (str[*i] == '(') {
			if ((c = GIB_Parse_Match_Paren (str, i)))
				return c;
		} else if (str[*i] == '\"') {
			if ((c = GIB_Parse_Match_Dquote (str, i)))
				return c;
		} else if (str[*i] == ')')
			return 0;
	}
	*i = n;
	return '(';
}

char
GIB_Parse_Match_Backtick (const char *str, unsigned int *i)
{
	char        c;
	unsigned int n = *i;

	for ((*i)++; str[*i]; (*i)++) {
		if (str[*i] == '`')
			return 0;
		else if (str[*i] == '\"') {		// Skip over strings as usual
			if ((c = GIB_Parse_Match_Dquote (str, i)))
				return c;
		}
	}
	*i = n;
	return '`';
}

char
GIB_Parse_Match_Index (const char *str, unsigned int *i)
{
	char        c;
	unsigned int n = *i;

	for ((*i)++; str[*i]; (*i)++) {
		if (str[*i] == '[' && (c = GIB_Parse_Match_Index (str, i)))
			return c;
		else if (str[*i] == ']')
			return 0;
	}
	*i = n;
	return '[';
}

char
GIB_Parse_Match_Var (const char *str, unsigned int *i)
{
	char        c;

	(*i)++;
	if (str[*i] == '{' && (c = GIB_Parse_Match_Brace (str, i)))
		return c;
	else {
		for (; isalnum ((byte) str[*i]) || str[*i] == '_'; (*i)++);
		if (str[*i] == '[') {
			if ((c = GIB_Parse_Match_Index (str, i)))
				return c;
			(*i)++;
		}
	}
	return 0;
}

qboolean    gib_parse_error;
unsigned int gib_parse_error_pos;
const char *gib_parse_error_msg;

static void
GIB_Parse_Error (const char *msg, unsigned int pos)
{
	gib_parse_error = true;
	gib_parse_error_msg = msg;
	gib_parse_error_pos = pos;
}

const char *
GIB_Parse_ErrorMsg (void)
{
	return gib_parse_error_msg;
}

unsigned int
GIB_Parse_ErrorPos (void)
{
	return gib_parse_error_pos;
}

// FIXME: Concatenation in stupid circumstances should generate errors

static gib_tree_t *
GIB_Parse_Tokens (const char *program, unsigned int *i, unsigned int pofs, gib_tree_t ** embedded)
{
	char        c = 0, delim, *str;
	unsigned int tstart, start;
	gib_tree_t *nodes = 0, *cur, *new, *embs = 0, *tmp;
	gib_tree_t **node = &nodes;
	enum {CAT_NORMAL = 0, CAT_DISALLOW, CAT_CONCAT} cat = CAT_DISALLOW;
	const char *catestr = "Comma found before first argument, nothing to concatenate to.";

	gib_parse_error = false;

	while (1) {
		// Skip whitespace
		while (program[*i] != '\n' && isspace ((byte) program[*i]))
			(*i)++;
		// Check for concatenation, skip comma and any more whitespace
		if (program[*i] == ',') {
			if (cat == CAT_DISALLOW) {
				GIB_Parse_Error(catestr, *i + pofs);
				goto ERROR;
			}
			cat = CAT_CONCAT;
			(*i)++;
			while (program[*i] != '\n' && isspace ((byte) program[*i]))
				(*i)++;
			if (program[*i] == ',') {
				GIB_Parse_Error("Double comma error.", *i+pofs);
				goto ERROR;
			}
		} else
			cat = CAT_NORMAL;
		// New line/command?
		if (!program[*i] || program[*i] == '\n' || program[*i] == ';')
			break;
		// Save our start position
		start = *i;
		tstart = start + 1;
		delim = program[*i];
		switch (delim) {
			case '{':
				if ((c = GIB_Parse_Match_Brace (program, i)))
					goto ERROR;
				break;
			case '\"':
				if ((c = GIB_Parse_Match_Dquote (program, i)))
					goto ERROR;
				break;
			case '(':
				if ((c = GIB_Parse_Match_Paren (program, i)))
					goto ERROR;
				break;
			default:
				// Find the end of a "normal" token
				delim = ' ';
				tstart = *i;
				for (;
					 program[*i] && !isspace ((byte) program[*i])
					 && program[*i] != ',' && program[*i] != ';'; (*i)++) {
					if (program[*i] == '{') {
						if ((c = GIB_Parse_Match_Brace (program, i)))
							goto ERROR;
					} else if (program[*i] == '(') {
						if ((c = GIB_Parse_Match_Paren (program, i)))
							goto ERROR;
					} else if (program[*i] == '`') {
						if ((c = GIB_Parse_Match_Backtick (program, i)))
							goto ERROR;
						// Handle comments
					} else if (program[*i] == '/' && program[*i + 1] == '/') {
						for ((*i) += 2; program[*i] && program[*i] != '\n';
							 (*i)++);
						goto DONE;
					}
				}
		}
		c = 0;
		cur = *node = GIB_Tree_New (TREE_T_ARG);
		cur->start = start + pofs;
		cur->end = *i + pofs;
		cur->delim = delim;
		str = calloc (*i - tstart + 1, sizeof (char));
		cur->str = str;
		memcpy (str, program + tstart, *i - tstart);
		if (cur->delim == '{') {
			if (cat == CAT_CONCAT) {
				GIB_Parse_Error ("Program blocks may not be concatenated with other arguments.", start + pofs);
				goto ERROR;
			}
			catestr = "Program blocks may not be concatenated with other arguments.";
			cat = CAT_DISALLOW;
			// Try to parse sub-program
			if (!(new = GIB_Parse_Lines (str, tstart + pofs)))
				goto ERROR;
			cur->children = new;
			// Check for embedded commands/variables
		} else if (cur->delim == ' ' || cur->delim == '(') {
			if (!
				(cur->children =
				 GIB_Parse_Embedded (str, tstart + pofs, &new))) {
				// There could be no embedded elements, so check for a real
				// error
				if (gib_parse_error)
					goto ERROR;
			} else {
				// Link/set flags
				cur->flags |= TREE_A_EMBED;
				// Add any embedded commands to top of chain
				if (new) {
					for (tmp = new; tmp->next; tmp = tmp->next);
					tmp->next = embs;
					embs = new;
				}
			}
			// Check for array splitting
			// Concatenating this onto something else is non-sensical
			if (cur->delim == ' ' && (str[0] == '@' || str[0] == '%')) {
				if (cat == CAT_CONCAT) {
					GIB_Parse_Error ("Variable expansions may not be concatenated with other arguments.", start + pofs);
					goto ERROR;
				}
				catestr = "Variable expansions may not be concatenated with other arguments.";
				cat = CAT_DISALLOW;
				cur->flags |= TREE_A_EXPAND;
			}
			// We can handle escape characters now
		} else if (cur->delim == '\"')
			GIB_Process_Escapes (str);

		if (cat == CAT_CONCAT)
			cur->flags |= TREE_A_CONCAT;
		// Nothing left to parse?
		if (!program[*i])
			break;
		// On non-normal tokens, move past the delimeter
		if (cur->delim != ' ')
			(*i)++;
		node = &cur->next;
	}
  DONE:
	*embedded = embs;
	return nodes;
  ERROR:
	if (c)
		GIB_Parse_Error (va ("Could not find match for '%c'.", c), *i + pofs);
	if (nodes)
		GIB_Tree_Unref (&nodes);
	if (embs)
		GIB_Tree_Unref (&embs);
	return 0;
}

static gib_tree_t *
GIB_Parse_Semantic_Preprocess (gib_tree_t * line)
{
	gib_tree_t *p, *start = line;

	// If second token is concatenated, than the first can't possibly mean anything
	if (line->children->next && line->children->next->flags & TREE_A_CONCAT)
		return line;
	while (!strcmp (line->children->str, "if")
		   || !strcmp (line->children->str, "ifnot")) {
		// Sanity checking
		if (!line->children->next || !line->children->next->next) {
			GIB_Parse_Error ("Not enough arguments to 'if' statement.",
							 line->start);
			return line;
		} else if (!line->children->next->next->children
				   || line->children->next->next->delim != '{') {
			GIB_Parse_Error
				("First program block in 'if' statement not enclosed in braces or invalid.",
				 line->start);
			return line;
		} else if (line->flags & TREE_L_EMBED) {
			GIB_Parse_Error
				("'if' statements may not be used in embedded commands.",
				 line->start);
			return line;
		}
		// Set as conditional
		line->type = TREE_T_COND;
		if (line->children->str[2])
			line->flags |= TREE_L_NOT;
		// Save our spot
		p = line;
		// Move subprogram inline
		line->next = line->children->next->next->children;
		line->children->next->next->children = 0;
		// Find end of subprogram
		while (line->next)
			line = line->next;
		// Handle "else"
		if (p->children->next->next->next
			&& !strcmp (p->children->next->next->next->str, "else")) {
			// Sanity checking
			if (!p->children->next->next->next->next) {
				GIB_Parse_Error
					("'if' statement contains 'else' but no secondary program block or command.",
					 line->start);
				return line;
			}
			// On 'true' first block must jump past this
			// We will figure out jump target later
			line->next = GIB_Tree_New (TREE_T_JUMPPLUS);
			line = line->next;
			// Jump to else block on 'false'
			p->jump = line;
			// Is "else" followed by a subprogram?
			if (p->children->next->next->next->next->delim == '{') {
				// Move subprogram inline
				line->next = p->children->next->next->next->next->children;
				p->children->next->next->next->next->children = 0;
				while (line->next)
					line = line->next;
			} else {
				// Push rest of tokens into a new line
				line->next = GIB_Tree_New (TREE_T_CMD);
				line->next->children = p->children->next->next->next->next;
				p->children->next->next->next->next = 0;
				line = line->next;
			}
		} else {
			// Jump past block on 'false'
			p->jump = line;
			break; // Don't touch if statements in the sub program
		}
	}
	// Now we know exit point from if-else if chain, set our jumps
	while (start) {
		if (start->type == TREE_T_JUMPPLUS && !start->jump)
			start->jump = line;
		start = start->next;
	}
	// Nothing expanded from a line remains, exit now
	if (!line->children)
		return line;
	// If we have a while loop, handle that
	if (!strcmp (line->children->str, "while")) {
		// Sanity checks
		if (!line->children->next || !line->children->next->next) {
			GIB_Parse_Error ("Not enough arguments to 'while' statement.",
							 line->start);
			return line;
		} else if (!line->children->next->next->children
				   || line->children->next->next->delim != '{') {
			GIB_Parse_Error
				("Program block in 'while' statement not enclosed in braces or invalid.",
				 line->start);
			return line;
		} else if (line->flags & TREE_L_EMBED) {
			GIB_Parse_Error
				("'while' statements may not be used in embedded commands.",
				 line->start);
			return line;
		}
		// Set conditional flag
		line->type = TREE_T_COND;
		// Save our spot
		p = line;
		// Move subprogram inline
		line->next = line->children->next->next->children;
		line->children->next->next->children = 0;
		// Find end of subprogram
		for (; line->next; line = line->next)
			if (!line->jump && line->children) {
				if (!strcmp (line->children->str, "continue")) {
					line->type = TREE_T_JUMP;
					line->jump = p;
				} else if (!strcmp (line->children->str, "break"))
					line->type = TREE_T_JUMPPLUS;
			}
		line->next = GIB_Tree_New (TREE_T_JUMP);
		line->next->jump = p;
		line = line->next;
		// Mark jump point out of loop
		p->jump = line;
		// Set jumps out of loop for "break" commands;
		while (p) {
			if (p->type == TREE_T_JUMPPLUS && !p->jump)
				p->jump = line;
			p = p->next;
		}
	} else if (!strcmp (line->children->str, "for")) {
		gib_tree_t *tmp;

		// Sanity checks
		if (!line->children->next || !line->children->next->next
			|| strcmp (line->children->next->next->str, "in")
			|| !line->children->next->next->next
			|| !line->children->next->next->next->next) {
			GIB_Parse_Error ("Malformed 'for' statement.", line->start);
			return line;
		} else if (line->flags & TREE_L_EMBED) {
			GIB_Parse_Error ("'for' statements may not be used in embedded commands.", line->start);
			return line;
		}
		// Find last token in line (contains program block)
		for (tmp = line->children->next->next->next->next; tmp->next;
			 tmp = tmp->next);
		// More sanity
		if (tmp->delim != '{' || !tmp->children) {
			GIB_Parse_Error
				("Program block in 'for' statement not enclosed in braces or invalid.",
				 line->start);
			return line;
		}
		// Add instruction to fetch next argument (this is the true loop start)
		line->next = GIB_Tree_New (TREE_T_FORNEXT);
		line = line->next;
		p = line;
		// Move subprogram inline
		line->next = tmp->children;
		tmp->children = 0;
		// Find end of subprogram
		for (; line->next; line = line->next)
			if (!line->jump && line->children) {
				if (!strcmp (line->children->str, "continue")) {
					line->type = TREE_T_JUMP;
					line->jump = p;
				} else if (!strcmp (line->children->str, "break"))
					line->type = TREE_T_JUMPPLUS;
			}
		line->next = GIB_Tree_New (TREE_T_JUMP);
		line->next->jump = p;
		line = line->next;
		// Mark jump point out of loop
		p->jump = line;
		// Mark jump point out of loop for break command
		while (p) {
			if (p->type == TREE_T_JUMPPLUS && !p->jump)
				p->jump = line;
			p = p->next;
		}
	} else if (
		line->children->next && 
		!(line->children->next->flags & TREE_A_CONCAT) && 
		line->children->next->delim == ' ' &&
		!strcmp (line->children->next->str, "=")
		) {
			if (line->flags & TREE_L_EMBED)
				GIB_Parse_Error ("Assignment may not be used as an embedded command.", line->start);
			line->type = TREE_T_ASSIGN;
	}
	return line;
}

gib_tree_t *
GIB_Parse_Lines (const char *program, unsigned int pofs)
{
	unsigned int i = 0, lstart;
	gib_tree_t *lines = 0, *cur, *tokens, **line = &lines, *embs;
	char       *str;

	while (1) {
		while (isspace ((byte) program[i]) || program[i] == ';')
			i++;
		if (!program[i])
			break;
		lstart = i;
		// If we parse something useful...
		if ((tokens = GIB_Parse_Tokens (program, &i, pofs, &embs))) {
			// Link it in
			cur = GIB_Tree_New (TREE_T_CMD);
			cur->delim = '\n';
			str = calloc (i - lstart + 1, sizeof (char));
			memcpy (str, program + lstart, i - lstart);
			cur->str = str;
			cur->start = lstart + pofs;
			cur->end = i + pofs;
			cur->children = tokens;
			// Line contains embedded commands?
			if (embs) {
				// Add them to chain before actual line
				*line = embs;
				for (; embs->next; embs = embs->next);
				embs->next = cur;
			} else
				*line = cur;
			// Do preprocessing
			line = &(GIB_Parse_Semantic_Preprocess (cur))->next;
		}
		if (gib_parse_error)
			goto ERROR;
	}
	return lines;
  ERROR:
	if (lines)
		GIB_Tree_Unref (&lines);
	return 0;
}

gib_tree_t *
GIB_Parse_Embedded (const char *program, unsigned int pofs, gib_tree_t ** embedded)
{
	unsigned int i, n, t;
	char        c, d, *str;
	gib_tree_t *lines = 0, **line = &lines, *cur, *tokens, *emb, *tmp, **embfirst;
	unsigned int start, end;

	gib_parse_error = false;
	embfirst = embedded;
	*embedded = 0;

	for (i = 0; program[i]; i++) {
		if (program[i] == '`' || (program[i] == '$' && program[i + 1] == '(')) {
			// Extract the embedded command
			start = i;
			if (program[i] == '`') {
				n = i + 1;
				if ((c = GIB_Parse_Match_Backtick (program, &i)))
					goto ERROR;
			} else {
				n = ++i + 1;
				if ((c = GIB_Parse_Match_Paren (program, &i)))
					goto ERROR;
			}
			end = i + 1;
			// Construct the actual line to be executed
			cur = GIB_Tree_New (TREE_T_CMD);
			cur->flags |= TREE_L_EMBED;
			cur->delim = '`';
			str = calloc (i - n + 1, sizeof (char));
			memcpy (str, program + n, i - n);
			cur->str = str;
			cur->start = start + pofs;
			cur->end = end + pofs;
			c = 0;
			t = 0;
			if (!(tokens = GIB_Parse_Tokens (cur->str, &t, start + pofs, &emb))) {
				GIB_Tree_Unref (&cur);
				goto ERROR;
			}
			cur->children = tokens;
			GIB_Parse_Semantic_Preprocess (cur)->next = *embedded;
			if (gib_parse_error) {
				GIB_Tree_Unref (&cur);
				goto ERROR;
			}
			// Did this have embedded commands of it's own?
			if (emb) {
				// Link them in first
				for (tmp = emb; tmp->next; tmp = tmp->next);
				tmp->next = cur;
				*embedded = emb;
			} else
				*embedded = cur;
			// Create a representative child node for GIB_Process_Embedded to
			// use
			cur = GIB_Tree_New (TREE_T_META);
			cur->delim = '`';
			// Save start/end indices
			cur->start = start;
			cur->end = end;
			*line = cur;
			line = &cur->next;
			// Check for variable substitution
		} else if (program[i] == '$' || program[i] == '#') {
			// Extract variable name
			start = i;
			end = 0;
			d = program[i];
			if (program[i + 1] == '{') {
				n = i + 2;
				end++;
			} else
				n = i + 1;
			if ((c = GIB_Parse_Match_Var (program, &i)))
				goto ERROR;
			end += i;

			cur = GIB_Tree_New (TREE_T_META);
			cur->delim = d;
			str = calloc (i - n + 1, sizeof (char));
			memcpy (str, program + n, i - n);
			cur->str = str;
			// Can we use the name as is, or must processing be done at
			// runtime?
			if (strchr (str, '$') || strchr (str, '#'))
				cur->flags |= TREE_A_EMBED;
			// Save start/end indices
			cur->start = start;
			cur->end = end;
			*line = cur;
			line = &cur->next;
			// Don't skip anything important
			if (program[n - 1] != '{')
				i--;
		}
	}
	return lines;
  ERROR:
	if (c)
		GIB_Parse_Error (va ("Could not find match for '%c'.", c), i + pofs);
	if (lines)
		GIB_Tree_Unref (&lines);
	if (*embfirst)
		GIB_Tree_Unref (embfirst);
	return 0;
}