/* gib_parse.c GIB parser functions Copyright (C) 2002 Brian Koropoff Author: Brian Koropoff Date: #DATE# This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to: Free Software Foundation, Inc. 59 Temple Place - Suite 330 Boston, MA 02111-1307, USA */ #ifdef HAVE_CONFIG_H # include "config.h" #endif static __attribute__ ((unused)) const char rcsid[] = "$Id$"; #include #include #include #include "QF/sys.h" #include "QF/dstring.h" #include "QF/va.h" #include "QF/cmd.h" #include "QF/cbuf.h" #include "QF/gib_buffer.h" #include "QF/gib_process.h" #include "QF/gib_builtin.h" #include "QF/gib_function.h" #include "QF/gib_vars.h" #include "QF/gib_parse.h" /* GIB_Escaped Returns true if character i in str is escaped with a backslash (and the backslash is not itself escaped). */ inline qboolean GIB_Escaped (const char *str, int i) { int n, c; if (!i) return 0; for (n = i - 1, c = 0; n >= 0 && str[n] == '\\'; n--, c++); return c & 1; } /* GIB_Parse_Match_* These are the workhorses of the GIB parser. They iterate an index variable through a string until an appropriate matching character is found, calling themselves and their neighbors recursively to handle sections of string that they are uninterested in. FIXME: Make sure everything is calling everything else it might need to. Make appropriate functions intolerant of newlines. */ static char GIB_Parse_Match_Dquote (const char *str, unsigned int *i) { unsigned int n = *i; for ((*i)++; str[*i]; (*i)++) { if (str[*i] == '\n') break; else if (str[*i] == '\"' && !GIB_Escaped (str, *i)) return 0; } *i = n; return '\"'; } char GIB_Parse_Match_Brace (const char *str, unsigned int *i) { char c; unsigned int n = *i; for ((*i)++; str[*i]; (*i)++) { if (str[*i] == '\"') { if ((c = GIB_Parse_Match_Dquote (str, i))) return c; } else if (str[*i] == '{') { if ((c = GIB_Parse_Match_Brace (str, i))) return c; } else if (str[*i] == '}') return 0; } *i = n; return '{'; } char GIB_Parse_Match_Paren (const char *str, unsigned int *i) { char c; unsigned int n = *i; for ((*i)++; str[*i]; (*i)++) { if (str[*i] == '(') { if ((c = GIB_Parse_Match_Paren (str, i))) return c; } else if (str[*i] == '\"') { if ((c = GIB_Parse_Match_Dquote (str, i))) return c; } else if (str[*i] == ')') return 0; } *i = n; return '('; } char GIB_Parse_Match_Backtick (const char *str, unsigned int *i) { char c; unsigned int n = *i; for ((*i)++; str[*i]; (*i)++) { if (str[*i] == '`') return 0; else if (str[*i] == '\"') { // Skip over strings as usual if ((c = GIB_Parse_Match_Dquote (str, i))) return c; } } *i = n; return '`'; } char GIB_Parse_Match_Index (const char *str, unsigned int *i) { char c; unsigned int n = *i; for ((*i)++; str[*i]; (*i)++) { if (str[*i] == '[' && (c = GIB_Parse_Match_Index (str, i))) return c; else if (str[*i] == ']') return 0; } *i = n; return '['; } char GIB_Parse_Match_Var (const char *str, unsigned int *i) { char c; (*i)++; if (str[*i] == '{' && (c = GIB_Parse_Match_Brace (str, i))) return c; else { for (; isalnum ((byte) str[*i]) || str[*i] == '_'; (*i)++); if (str[*i] == '[') { if ((c = GIB_Parse_Match_Index (str, i))) return c; (*i)++; } } return 0; } qboolean gib_parse_error; unsigned int gib_parse_error_pos; const char *gib_parse_error_msg; static void GIB_Parse_Error (const char *msg, unsigned int pos) { gib_parse_error = true; gib_parse_error_msg = msg; gib_parse_error_pos = pos; } const char * GIB_Parse_ErrorMsg (void) { return gib_parse_error_msg; } unsigned int GIB_Parse_ErrorPos (void) { return gib_parse_error_pos; } // FIXME: Concatenation in stupid circumstances should generate errors static gib_tree_t * GIB_Parse_Tokens (const char *program, unsigned int *i, unsigned int pofs, gib_tree_t ** embedded) { char c, delim, *str; unsigned int tstart, start; gib_tree_t *nodes = 0, *cur, *new, *embs = 0, *tmp; gib_tree_t **node = &nodes; qboolean cat = false; gib_parse_error = false; while (1) { // Skip whitespace while (program[*i] != '\n' && isspace ((byte) program[*i])) (*i)++; // Check for concatenation, skip comma and any more whitespace if (program[*i] == ',') { cat = true; (*i)++; continue; } // New line/command? if (!program[*i] || program[*i] == '\n' || program[*i] == ';') break; // Save our start position start = *i; tstart = start + 1; delim = program[*i]; switch (delim) { case '{': if ((c = GIB_Parse_Match_Brace (program, i))) goto ERROR; break; case '\"': if ((c = GIB_Parse_Match_Dquote (program, i))) goto ERROR; break; case '(': if ((c = GIB_Parse_Match_Paren (program, i))) goto ERROR; break; default: // Find the end of a "normal" token delim = ' '; tstart = *i; for (; program[*i] && !isspace ((byte) program[*i]) && program[*i] != ',' && program[*i] != ';'; (*i)++) { if (program[*i] == '{') { if ((c = GIB_Parse_Match_Brace (program, i))) goto ERROR; } else if (program[*i] == '(') { if ((c = GIB_Parse_Match_Paren (program, i))) goto ERROR; } else if (program[*i] == '`') { if ((c = GIB_Parse_Match_Backtick (program, i))) goto ERROR; // Handle comments } else if (program[*i] == '/' && program[*i + 1] == '/') { for ((*i) += 2; program[*i] && program[*i] != '\n'; (*i)++); goto DONE; } } } c = 0; cur = *node = GIB_Tree_New (TREE_T_ARG); cur->start = start + pofs; cur->end = *i + pofs; cur->delim = delim; str = calloc (*i - tstart + 1, sizeof (char)); memcpy (str, program + tstart, *i - tstart); if (cur->delim == '{') { // Try to parse sub-program if (!(new = GIB_Parse_Lines (str, tstart + pofs))) goto ERROR; cur->children = new; // Check for embedded commands/variables } else if (cur->delim == ' ' || cur->delim == '(') { if (! (cur->children = GIB_Parse_Embedded (str, tstart + pofs, &new))) { // There could be no embedded elements, so check for a real // error if (gib_parse_error) goto ERROR; } else { // Link/set flags cur->flags |= TREE_A_EMBED; // Add any embedded commands to top of chain if (new) { for (tmp = new; tmp->next; tmp = tmp->next); tmp->next = embs; embs = new; } } // Check for array splitting // Concatenating this onto something else is non-sensical if (cur->delim == ' ' && (str[0] == '@' || str[0] == '%') && !cat) { cur->flags |= TREE_A_EXPAND; } // We can handle escape characters now } else if (cur->delim == '\"') GIB_Process_Escapes (str); cur->str = str; if (cat) { cur->flags |= TREE_A_CONCAT; cat = false; } // Nothing left to parse? if (!program[*i]) break; // On non-normal tokens, move past the delimeter if (cur->delim != ' ') (*i)++; node = &cur->next; } DONE: *embedded = embs; return nodes; ERROR: if (c) GIB_Parse_Error (va ("Could not find match for '%c'.", c), *i + pofs); if (nodes) GIB_Tree_Free_Recursive (nodes); return 0; } static gib_tree_t * GIB_Parse_Semantic_Preprocess (gib_tree_t * line) { gib_tree_t *p, *start = line; while (!strcmp (line->children->str, "if") || !strcmp (line->children->str, "ifnot")) { // Sanity checking if (!line->children->next || !line->children->next->next) { GIB_Parse_Error ("Not enough arguments to 'if' statement.", line->start); return line; } else if (!line->children->next->next->children || line->children->next->next->delim != '{') { GIB_Parse_Error ("First program block in 'if' statement not enclosed in braces or invalid.", line->start); return line; } else if (line->flags & TREE_L_EMBED) { GIB_Parse_Error ("'if' statements may not be used in embedded commands.", line->start); return line; } // Set as conditional line->type = TREE_T_COND; if (line->children->str[2]) line->flags |= TREE_L_NOT; // Save our spot p = line; // Move subprogram inline line->next = line->children->next->next->children; line->children->next->next->children = 0; // Find end of subprogram while (line->next) line = line->next; // Handle "else" if (p->children->next->next->next && !strcmp (p->children->next->next->next->str, "else")) { // Sanity checking if (!p->children->next->next->next->next) { GIB_Parse_Error ("'if' statement contains 'else' but no secondary program block or command.", line->start); return line; } // On 'true' first block must jump past this // We will figure out jump target later line->next = GIB_Tree_New (TREE_T_JUMPPLUS); line = line->next; // Jump to else block on 'false' p->jump = line; // Is "else" followed by a subprogram? if (p->children->next->next->next->next->delim == '{') { // Move subprogram inline line->next = p->children->next->next->next->next->children; p->children->next->next->next->next->children = 0; while (line->next) line = line->next; } else { // Push rest of tokens into a new line line->next = GIB_Tree_New (TREE_T_CMD); line->next->children = p->children->next->next->next->next; p->children->next->next->next->next = 0; line = line->next; } } else { // Jump past block on 'false' p->jump = line; break; // Don't touch if statements in the sub program } } // Now we know exit point from if-else if chain, set our jumps while (start) { if (start->type == TREE_T_JUMPPLUS && !start->jump) start->jump = line; start = start->next; } // Nothing expanded from a line remains, exit now if (!line->children) return line; // If we have a while loop, handle that if (!strcmp (line->children->str, "while")) { // Sanity checks if (!line->children->next || !line->children->next->next) { GIB_Parse_Error ("Not enough arguments to 'while' statement.", line->start); return line; } else if (!line->children->next->next->children || line->children->next->next->delim != '{') { GIB_Parse_Error ("Program block in 'while' statement not enclosed in braces or invalid.", line->start); return line; } else if (line->flags & TREE_L_EMBED) { GIB_Parse_Error ("'while' statements may not be used in embedded commands.", line->start); return line; } // Set conditional flag line->type = TREE_T_COND; // Save our spot p = line; // Move subprogram inline line->next = line->children->next->next->children; line->children->next->next->children = 0; // Find end of subprogram for (; line->next; line = line->next) if (!line->jump && line->children) { if (!strcmp (line->children->str, "continue")) { line->type = TREE_T_JUMP; line->jump = p; } else if (!strcmp (line->children->str, "break")) line->type = TREE_T_JUMPPLUS; } line->next = GIB_Tree_New (TREE_T_JUMP); line->next->jump = p; line = line->next; // Mark jump point out of loop p->jump = line; // Set jumps out of loop for "break" commands; while (p) { if (p->type == TREE_T_JUMPPLUS && !p->jump) p->jump = line; p = p->next; } } else if (!strcmp (line->children->str, "for")) { gib_tree_t *tmp; // Sanity checks if (!line->children->next || !line->children->next->next || strcmp (line->children->next->next->str, "in") || !line->children->next->next->next || !line->children->next->next->next->next) { GIB_Parse_Error ("Malformed 'for' statement.", line->start); return line; } // Find last token in line (contains program block) for (tmp = line->children->next->next->next->next; tmp->next; tmp = tmp->next); // More sanity if (tmp->delim != '{' || !tmp->children) { GIB_Parse_Error ("Program block in 'for' statement not enclosed in braces or invalid.", line->start); return line; } // Add instruction to fetch next argument (this is the true loop start) line->next = GIB_Tree_New (TREE_T_FORNEXT); line = line->next; p = line; // Move subprogram inline line->next = tmp->children; tmp->children = 0; // Find end of subprogram for (; line->next; line = line->next) if (!line->jump && line->children) { if (!strcmp (line->children->str, "continue")) { line->type = TREE_T_JUMP; line->jump = p; } else if (!strcmp (line->children->str, "break")) line->type = TREE_T_JUMPPLUS; } line->next = GIB_Tree_New (TREE_T_JUMP); line->next->jump = p; line = line->next; // Mark jump point out of loop p->jump = line; // Mark jump point out of loop for break command while (p) { if (p->type == TREE_T_JUMPPLUS && !p->jump) p->jump = line; p = p->next; } } else if (line->children->next && line->children->next->delim == ' ' && !strcmp (line->children->next->str, "=")) line->type = TREE_T_ASSIGN; return line; } gib_tree_t * GIB_Parse_Lines (const char *program, unsigned int pofs) { unsigned int i = 0, lstart; gib_tree_t *lines = 0, *cur, *tokens, **line = &lines, *embs; char *str; while (1) { while (isspace ((byte) program[i]) || program[i] == ';') i++; if (!program[i]) break; lstart = i; // If we parse something useful... if ((tokens = GIB_Parse_Tokens (program, &i, pofs, &embs))) { // Link it in cur = GIB_Tree_New (TREE_T_CMD); cur->delim = '\n'; str = calloc (i - lstart + 1, sizeof (char)); memcpy (str, program + lstart, i - lstart); cur->str = str; cur->start = lstart + pofs; cur->end = i + pofs; cur->children = tokens; // Line contains embedded commands? if (embs) { // Add them to chain before actual line *line = embs; for (; embs->next; embs = embs->next); embs->next = cur; } else *line = cur; // Do preprocessing line = &(GIB_Parse_Semantic_Preprocess (cur))->next; } if (gib_parse_error) goto ERROR; } return lines; ERROR: if (lines) GIB_Tree_Free_Recursive (lines); return 0; } gib_tree_t * GIB_Parse_Embedded (const char *program, unsigned int pofs, gib_tree_t ** embedded) { unsigned int i, n, t; char c, d, *str; gib_tree_t *lines = 0, **line = &lines, *cur, *tokens, *emb, *tmp; unsigned int start, end; gib_parse_error = false; *embedded = 0; for (i = 0; program[i]; i++) { if (program[i] == '`' || (program[i] == '$' && program[i + 1] == '(')) { // Extract the embedded command start = i; if (program[i] == '`') { n = i + 1; if ((c = GIB_Parse_Match_Backtick (program, &i))) goto ERROR; } else { n = ++i + 1; if ((c = GIB_Parse_Match_Paren (program, &i))) goto ERROR; } end = i + 1; // Construct the actual line to be executed cur = GIB_Tree_New (TREE_T_CMD); cur->flags |= TREE_L_EMBED; cur->delim = '`'; str = calloc (i - n + 1, sizeof (char)); memcpy (str, program + n, i - n); cur->str = str; cur->start = start + pofs; cur->end = end + pofs; c = 0; t = 0; if (! (tokens = GIB_Parse_Tokens (cur->str, &t, start + pofs, &emb))) goto ERROR; cur->children = tokens; GIB_Parse_Semantic_Preprocess (cur)->next = *embedded; if (gib_parse_error) goto ERROR; // Did this have embedded commands of it's own? if (emb) { // Link them in first for (tmp = emb; tmp->next; tmp = tmp->next); tmp->next = cur; *embedded = emb; } else *embedded = cur; // Create a representative child node for GIB_Process_Embedded to // use cur = GIB_Tree_New (TREE_T_META); cur->delim = '`'; // Save start/end indices cur->start = start; cur->end = end; *line = cur; line = &cur->next; // Check for variable substitution } else if (program[i] == '$' || program[i] == '#') { // Extract variable name start = i; end = 0; d = program[i]; if (program[i + 1] == '{') { n = i + 2; end++; } else n = i + 1; if ((c = GIB_Parse_Match_Var (program, &i))) goto ERROR; end += i; cur = GIB_Tree_New (TREE_T_META); cur->delim = d; str = calloc (i - n + 1, sizeof (char)); memcpy (str, program + n, i - n); cur->str = str; // Can we use the name as is, or must processing be done at // runtime? if (strchr (str, '$') || strchr (str, '#')) cur->flags |= TREE_A_EMBED; // Save start/end indices cur->start = start; cur->end = end; *line = cur; line = &cur->next; // Don't skip anything important if (program[n - 1] != '{') i--; } } return lines; ERROR: if (c) GIB_Parse_Error (va ("Could not find match for '%c'.", c), i + pofs); if (lines) GIB_Tree_Free_Recursive (lines); return 0; }