diff --git a/tools/lemon/lemon.c b/tools/lemon/lemon.c index ae10a64479..4a0eb488e3 100644 --- a/tools/lemon/lemon.c +++ b/tools/lemon/lemon.c @@ -1463,12 +1463,15 @@ int main(int argc, char **argv) } /* Count and index the symbols of the grammar */ - lem.nsymbol = Symbol_count(); Symbol_new("{default}"); + lem.nsymbol = Symbol_count(); lem.symbols = Symbol_arrayof(); - for(i=0; i<=lem.nsymbol; i++) lem.symbols[i]->index = i; - qsort(lem.symbols,lem.nsymbol+1,sizeof(struct symbol*), Symbolcmpp); - for(i=0; i<=lem.nsymbol; i++) lem.symbols[i]->index = i; + for(i=0; iindex = i; + qsort(lem.symbols,lem.nsymbol,sizeof(struct symbol*), Symbolcmpp); + for(i=0; iindex = i; + while( lem.symbols[i-1]->type==MULTITERMINAL ){ i--; } + assert( strcmp(lem.symbols[i-1]->name,"{default}")==0 ); + lem.nsymbol = i - 1; for(i=1; isupper(lem.symbols[i]->name[0]); i++); lem.nterminal = i; @@ -1947,7 +1950,9 @@ enum e_state { WAITING_FOR_DESTRUCTOR_SYMBOL, WAITING_FOR_DATATYPE_SYMBOL, WAITING_FOR_FALLBACK_ID, - WAITING_FOR_WILDCARD_ID + WAITING_FOR_WILDCARD_ID, + WAITING_FOR_CLASS_ID, + WAITING_FOR_CLASS_TOKEN }; struct pstate { char *filename; /* Name of the input file */ @@ -1957,6 +1962,7 @@ struct pstate { struct lemon *gp; /* Global state vector */ enum e_state state; /* The state of the parser */ struct symbol *fallback; /* The fallback token */ + struct symbol *tkclass; /* Token class symbol */ struct symbol *lhs; /* Left-hand side of current rule */ const char *lhsalias; /* Alias for the LHS */ int nrhs; /* Number of right-hand side symbols seen */ @@ -2260,6 +2266,8 @@ to follow the previous rule."); psp->state = WAITING_FOR_FALLBACK_ID; }else if( strcmp(x,"wildcard")==0 ){ psp->state = WAITING_FOR_WILDCARD_ID; + }else if( strcmp(x,"token_class")==0 ){ + psp->state = WAITING_FOR_CLASS_ID; }else{ ErrorMsg(psp->filename,psp->tokenlineno, "Unknown declaration keyword: \"%%%s\".",x); @@ -2428,6 +2436,40 @@ to follow the previous rule."); } } break; + case WAITING_FOR_CLASS_ID: + if( !islower(x[0]) ){ + ErrorMsg(psp->filename, psp->tokenlineno, + "%%token_class must be followed by an identifier: ", x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + }else if( Symbol_find(x) ){ + ErrorMsg(psp->filename, psp->tokenlineno, + "Symbol \"%s\" already used", x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + }else{ + psp->tkclass = Symbol_new(x); + psp->tkclass->type = MULTITERMINAL; + psp->state = WAITING_FOR_CLASS_TOKEN; + } + break; + case WAITING_FOR_CLASS_TOKEN: + if( x[0]=='.' ){ + psp->state = WAITING_FOR_DECL_OR_RULE; + }else if( isupper(x[0]) || ((x[0]=='|' || x[0]=='/') && isupper(x[1])) ){ + struct symbol *msp = psp->tkclass; + msp->nsubsym++; + msp->subsym = (struct symbol **) realloc(msp->subsym, + sizeof(struct symbol*)*msp->nsubsym); + if( !isupper(x[0]) ) x++; + msp->subsym[msp->nsubsym-1] = Symbol_new(x); + }else{ + ErrorMsg(psp->filename, psp->tokenlineno, + "%%token_class argument \"%s\" should be a token", x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + } + break; case RESYNC_AFTER_RULE_ERROR: /* if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE; ** break; */ @@ -2800,11 +2842,13 @@ void Reprint(struct lemon *lemp) printf(" ::="); for(i=0; inrhs; i++){ sp = rp->rhs[i]; - printf(" %s", sp->name); if( sp->type==MULTITERMINAL ){ + printf(" %s", sp->subsym[0]->name); for(j=1; jnsubsym; j++){ printf("|%s", sp->subsym[j]->name); } + }else{ + printf(" %s", sp->name); } /* if( rp->rhsalias[i] ) printf("(%s)",rp->rhsalias[i]); */ } @@ -2826,11 +2870,13 @@ void ConfigPrint(FILE *fp, struct config *cfp) if( i==cfp->dot ) fprintf(fp," *"); if( i==rp->nrhs ) break; sp = rp->rhs[i]; - fprintf(fp," %s", sp->name); if( sp->type==MULTITERMINAL ){ + fprintf(fp," %s", sp->subsym[0]->name); for(j=1; jnsubsym; j++){ fprintf(fp,"|%s",sp->subsym[j]->name); } + }else{ + fprintf(fp," %s", sp->name); } } } @@ -3587,9 +3633,11 @@ static void writeRuleText(FILE *out, struct rule *rp){ fprintf(out,"%s ::=", rp->lhs->name); for(j=0; jnrhs; j++){ struct symbol *sp = rp->rhs[j]; - fprintf(out," %s", sp->name); - if( sp->type==MULTITERMINAL ){ + if( sp->type!=MULTITERMINAL ){ + fprintf(out," %s", sp->name); + }else{ int k; + fprintf(out," %s", sp->subsym[0]->name); for(k=1; knsubsym; k++){ fprintf(out,"|%s",sp->subsym[k]->name); } @@ -4058,7 +4106,8 @@ void ReportHeader(struct lemon *lemp) if( in ){ int nextChar; for(i=1; interminal && fgets(line,LINESIZE,in); i++){ - sprintf(pattern,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i); + sprintf(pattern,"#define %s%-30s %2d\n", + prefix,lemp->symbols[i]->name,i); if( strcmp(line,pattern) ) break; } nextChar = fgetc(in); @@ -4072,7 +4121,7 @@ void ReportHeader(struct lemon *lemp) out = file_open(lemp,".h","wb"); if( out ){ for(i=1; interminal; i++){ - fprintf(out,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i); + fprintf(out,"#define %s%-30s %3d\n",prefix,lemp->symbols[i]->name,i); } fclose(out); } @@ -4442,11 +4491,15 @@ struct symbol *Symbol_new(const char *x) return sp; } -/* Compare two symbols for working purposes +/* Compare two symbols for sorting purposes. Return negative, +** zero, or positive if a is less then, equal to, or greater +** than b. ** ** Symbols that begin with upper case letters (terminals or tokens) ** must sort before symbols that begin with lower case letters -** (non-terminals). Other than that, the order does not matter. +** (non-terminals). And MULTITERMINAL symbols (created using the +** %token_class directive) must sort at the very end. Other than +** that, the order does not matter. ** ** We find experimentally that leaving the symbols in their original ** order (the order they appeared in the grammar file) gives the @@ -4454,12 +4507,11 @@ struct symbol *Symbol_new(const char *x) */ int Symbolcmpp(const void *_a, const void *_b) { - const struct symbol **a = (const struct symbol **) _a; - const struct symbol **b = (const struct symbol **) _b; - int i1 = (**a).index + 10000000*((**a).name[0]>'Z'); - int i2 = (**b).index + 10000000*((**b).name[0]>'Z'); - assert( i1!=i2 || strcmp((**a).name,(**b).name)==0 ); - return i1-i2; + const struct symbol *a = *(const struct symbol **) _a; + const struct symbol *b = *(const struct symbol **) _b; + int i1 = a->type==MULTITERMINAL ? 3 : a->name[0]>'Z' ? 2 : 1; + int i2 = b->type==MULTITERMINAL ? 3 : b->name[0]>'Z' ? 2 : 1; + return i1==i2 ? a->index - b->index : i1 - i2; } /* There is one instance of the following structure for each