Lemon update 2014-01-11 03:06:18 on branch lemon-updates

- Add the new "%token_class" directive for defining symbolic names that stand any one of a collection of tokens. (user: drh)
This commit is contained in:
Randy Heit 2016-03-20 10:08:44 -05:00
parent 249aed455b
commit e59ef08cc8
1 changed files with 71 additions and 19 deletions

View File

@ -1463,12 +1463,15 @@ int main(int argc, char **argv)
} }
/* Count and index the symbols of the grammar */ /* Count and index the symbols of the grammar */
lem.nsymbol = Symbol_count();
Symbol_new("{default}"); Symbol_new("{default}");
lem.nsymbol = Symbol_count();
lem.symbols = Symbol_arrayof(); lem.symbols = Symbol_arrayof();
for(i=0; i<=lem.nsymbol; i++) lem.symbols[i]->index = i; for(i=0; i<lem.nsymbol; i++) lem.symbols[i]->index = i;
qsort(lem.symbols,lem.nsymbol+1,sizeof(struct symbol*), Symbolcmpp); qsort(lem.symbols,lem.nsymbol,sizeof(struct symbol*), Symbolcmpp);
for(i=0; i<=lem.nsymbol; i++) lem.symbols[i]->index = i; for(i=0; i<lem.nsymbol; i++) lem.symbols[i]->index = i;
while( lem.symbols[i-1]->type==MULTITERMINAL ){ i--; }
assert( strcmp(lem.symbols[i-1]->name,"{default}")==0 );
lem.nsymbol = i - 1;
for(i=1; isupper(lem.symbols[i]->name[0]); i++); for(i=1; isupper(lem.symbols[i]->name[0]); i++);
lem.nterminal = i; lem.nterminal = i;
@ -1947,7 +1950,9 @@ enum e_state {
WAITING_FOR_DESTRUCTOR_SYMBOL, WAITING_FOR_DESTRUCTOR_SYMBOL,
WAITING_FOR_DATATYPE_SYMBOL, WAITING_FOR_DATATYPE_SYMBOL,
WAITING_FOR_FALLBACK_ID, WAITING_FOR_FALLBACK_ID,
WAITING_FOR_WILDCARD_ID WAITING_FOR_WILDCARD_ID,
WAITING_FOR_CLASS_ID,
WAITING_FOR_CLASS_TOKEN
}; };
struct pstate { struct pstate {
char *filename; /* Name of the input file */ char *filename; /* Name of the input file */
@ -1957,6 +1962,7 @@ struct pstate {
struct lemon *gp; /* Global state vector */ struct lemon *gp; /* Global state vector */
enum e_state state; /* The state of the parser */ enum e_state state; /* The state of the parser */
struct symbol *fallback; /* The fallback token */ struct symbol *fallback; /* The fallback token */
struct symbol *tkclass; /* Token class symbol */
struct symbol *lhs; /* Left-hand side of current rule */ struct symbol *lhs; /* Left-hand side of current rule */
const char *lhsalias; /* Alias for the LHS */ const char *lhsalias; /* Alias for the LHS */
int nrhs; /* Number of right-hand side symbols seen */ int nrhs; /* Number of right-hand side symbols seen */
@ -2260,6 +2266,8 @@ to follow the previous rule.");
psp->state = WAITING_FOR_FALLBACK_ID; psp->state = WAITING_FOR_FALLBACK_ID;
}else if( strcmp(x,"wildcard")==0 ){ }else if( strcmp(x,"wildcard")==0 ){
psp->state = WAITING_FOR_WILDCARD_ID; psp->state = WAITING_FOR_WILDCARD_ID;
}else if( strcmp(x,"token_class")==0 ){
psp->state = WAITING_FOR_CLASS_ID;
}else{ }else{
ErrorMsg(psp->filename,psp->tokenlineno, ErrorMsg(psp->filename,psp->tokenlineno,
"Unknown declaration keyword: \"%%%s\".",x); "Unknown declaration keyword: \"%%%s\".",x);
@ -2428,6 +2436,40 @@ to follow the previous rule.");
} }
} }
break; break;
case WAITING_FOR_CLASS_ID:
if( !islower(x[0]) ){
ErrorMsg(psp->filename, psp->tokenlineno,
"%%token_class must be followed by an identifier: ", x);
psp->errorcnt++;
psp->state = RESYNC_AFTER_DECL_ERROR;
}else if( Symbol_find(x) ){
ErrorMsg(psp->filename, psp->tokenlineno,
"Symbol \"%s\" already used", x);
psp->errorcnt++;
psp->state = RESYNC_AFTER_DECL_ERROR;
}else{
psp->tkclass = Symbol_new(x);
psp->tkclass->type = MULTITERMINAL;
psp->state = WAITING_FOR_CLASS_TOKEN;
}
break;
case WAITING_FOR_CLASS_TOKEN:
if( x[0]=='.' ){
psp->state = WAITING_FOR_DECL_OR_RULE;
}else if( isupper(x[0]) || ((x[0]=='|' || x[0]=='/') && isupper(x[1])) ){
struct symbol *msp = psp->tkclass;
msp->nsubsym++;
msp->subsym = (struct symbol **) realloc(msp->subsym,
sizeof(struct symbol*)*msp->nsubsym);
if( !isupper(x[0]) ) x++;
msp->subsym[msp->nsubsym-1] = Symbol_new(x);
}else{
ErrorMsg(psp->filename, psp->tokenlineno,
"%%token_class argument \"%s\" should be a token", x);
psp->errorcnt++;
psp->state = RESYNC_AFTER_DECL_ERROR;
}
break;
case RESYNC_AFTER_RULE_ERROR: case RESYNC_AFTER_RULE_ERROR:
/* if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE; /* if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE;
** break; */ ** break; */
@ -2800,11 +2842,13 @@ void Reprint(struct lemon *lemp)
printf(" ::="); printf(" ::=");
for(i=0; i<rp->nrhs; i++){ for(i=0; i<rp->nrhs; i++){
sp = rp->rhs[i]; sp = rp->rhs[i];
printf(" %s", sp->name);
if( sp->type==MULTITERMINAL ){ if( sp->type==MULTITERMINAL ){
printf(" %s", sp->subsym[0]->name);
for(j=1; j<sp->nsubsym; j++){ for(j=1; j<sp->nsubsym; j++){
printf("|%s", sp->subsym[j]->name); printf("|%s", sp->subsym[j]->name);
} }
}else{
printf(" %s", sp->name);
} }
/* if( rp->rhsalias[i] ) printf("(%s)",rp->rhsalias[i]); */ /* if( rp->rhsalias[i] ) printf("(%s)",rp->rhsalias[i]); */
} }
@ -2826,11 +2870,13 @@ void ConfigPrint(FILE *fp, struct config *cfp)
if( i==cfp->dot ) fprintf(fp," *"); if( i==cfp->dot ) fprintf(fp," *");
if( i==rp->nrhs ) break; if( i==rp->nrhs ) break;
sp = rp->rhs[i]; sp = rp->rhs[i];
fprintf(fp," %s", sp->name);
if( sp->type==MULTITERMINAL ){ if( sp->type==MULTITERMINAL ){
fprintf(fp," %s", sp->subsym[0]->name);
for(j=1; j<sp->nsubsym; j++){ for(j=1; j<sp->nsubsym; j++){
fprintf(fp,"|%s",sp->subsym[j]->name); fprintf(fp,"|%s",sp->subsym[j]->name);
} }
}else{
fprintf(fp," %s", sp->name);
} }
} }
} }
@ -3587,9 +3633,11 @@ static void writeRuleText(FILE *out, struct rule *rp){
fprintf(out,"%s ::=", rp->lhs->name); fprintf(out,"%s ::=", rp->lhs->name);
for(j=0; j<rp->nrhs; j++){ for(j=0; j<rp->nrhs; j++){
struct symbol *sp = rp->rhs[j]; struct symbol *sp = rp->rhs[j];
if( sp->type!=MULTITERMINAL ){
fprintf(out," %s", sp->name); fprintf(out," %s", sp->name);
if( sp->type==MULTITERMINAL ){ }else{
int k; int k;
fprintf(out," %s", sp->subsym[0]->name);
for(k=1; k<sp->nsubsym; k++){ for(k=1; k<sp->nsubsym; k++){
fprintf(out,"|%s",sp->subsym[k]->name); fprintf(out,"|%s",sp->subsym[k]->name);
} }
@ -4058,7 +4106,8 @@ void ReportHeader(struct lemon *lemp)
if( in ){ if( in ){
int nextChar; int nextChar;
for(i=1; i<lemp->nterminal && fgets(line,LINESIZE,in); i++){ for(i=1; i<lemp->nterminal && fgets(line,LINESIZE,in); i++){
sprintf(pattern,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i); sprintf(pattern,"#define %s%-30s %2d\n",
prefix,lemp->symbols[i]->name,i);
if( strcmp(line,pattern) ) break; if( strcmp(line,pattern) ) break;
} }
nextChar = fgetc(in); nextChar = fgetc(in);
@ -4072,7 +4121,7 @@ void ReportHeader(struct lemon *lemp)
out = file_open(lemp,".h","wb"); out = file_open(lemp,".h","wb");
if( out ){ if( out ){
for(i=1; i<lemp->nterminal; i++){ for(i=1; i<lemp->nterminal; i++){
fprintf(out,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i); fprintf(out,"#define %s%-30s %3d\n",prefix,lemp->symbols[i]->name,i);
} }
fclose(out); fclose(out);
} }
@ -4442,11 +4491,15 @@ struct symbol *Symbol_new(const char *x)
return sp; return sp;
} }
/* Compare two symbols for working purposes /* Compare two symbols for sorting purposes. Return negative,
** zero, or positive if a is less then, equal to, or greater
** than b.
** **
** Symbols that begin with upper case letters (terminals or tokens) ** Symbols that begin with upper case letters (terminals or tokens)
** must sort before symbols that begin with lower case letters ** must sort before symbols that begin with lower case letters
** (non-terminals). Other than that, the order does not matter. ** (non-terminals). And MULTITERMINAL symbols (created using the
** %token_class directive) must sort at the very end. Other than
** that, the order does not matter.
** **
** We find experimentally that leaving the symbols in their original ** We find experimentally that leaving the symbols in their original
** order (the order they appeared in the grammar file) gives the ** order (the order they appeared in the grammar file) gives the
@ -4454,12 +4507,11 @@ struct symbol *Symbol_new(const char *x)
*/ */
int Symbolcmpp(const void *_a, const void *_b) int Symbolcmpp(const void *_a, const void *_b)
{ {
const struct symbol **a = (const struct symbol **) _a; const struct symbol *a = *(const struct symbol **) _a;
const struct symbol **b = (const struct symbol **) _b; const struct symbol *b = *(const struct symbol **) _b;
int i1 = (**a).index + 10000000*((**a).name[0]>'Z'); int i1 = a->type==MULTITERMINAL ? 3 : a->name[0]>'Z' ? 2 : 1;
int i2 = (**b).index + 10000000*((**b).name[0]>'Z'); int i2 = b->type==MULTITERMINAL ? 3 : b->name[0]>'Z' ? 2 : 1;
assert( i1!=i2 || strcmp((**a).name,(**b).name)==0 ); return i1==i2 ? a->index - b->index : i1 - i2;
return i1-i2;
} }
/* There is one instance of the following structure for each /* There is one instance of the following structure for each