Lemon update 2015-09-07 18:23:37 on branch lemon-update

- For the Lemon-generated parser, add a new action type SHIFTREDUCE and use it to further compress the parser tables and improve parser performance. (user: drh)
This commit is contained in:
Randy Heit 2016-03-20 10:27:12 -05:00
parent 2b5cef0c17
commit 3d5867d29e
2 changed files with 173 additions and 106 deletions

View file

@ -220,7 +220,8 @@ enum e_action {
RRCONFLICT, /* Was a reduce, but part of a conflict */
SH_RESOLVED, /* Was a shift. Precedence resolved conflict */
RD_RESOLVED, /* Was reduce. Precedence resolved conflict */
NOT_USED /* Deleted by compression */
NOT_USED, /* Deleted by compression */
SHIFTREDUCE /* Shift first, then reduce */
};
/* Every shift or reduce operation is stored as one of the following */
@ -244,7 +245,9 @@ struct state {
struct action *ap; /* Array of actions for this state */
int nTknAct, nNtAct; /* Number of actions on terminals and nonterminals */
int iTknOfst, iNtOfst; /* yy_action[] offset for terminals and nonterms */
int iDflt; /* Default action is reduce by this rule */
int iDfltReduce; /* Default action is to REDUCE by this rule */
struct rule *pDfltReduce;/* The default REDUCE rule. */
int autoReduce; /* True if this is an auto-reduce state */
};
#define NO_OFFSET (-2147483647)
@ -264,6 +267,7 @@ struct lemon {
struct state **sorted; /* Table of states sorted by state number */
struct rule *rule; /* List of all rules */
int nstate; /* Number of states */
int nxstate; /* nstate with tail degenerate states removed */
int nrule; /* Number of rules */
int nsymbol; /* Number of terminal and nonterminal symbols */
int nterminal; /* Number of terminal symbols */
@ -388,7 +392,7 @@ struct action *ap2;
if( rc==0 ){
rc = (int)ap1->type - (int)ap2->type;
}
if( rc==0 && ap1->type==REDUCE ){
if( rc==0 && (ap1->type==REDUCE || ap1->type==SHIFTREDUCE) ){
rc = ap1->x.rp->index - ap2->x.rp->index;
}
if( rc==0 ){
@ -1547,7 +1551,7 @@ int main(int argc, char **argv)
stats_line("non-terminal symbols", lem.nsymbol - lem.nterminal);
stats_line("total symbols", lem.nsymbol);
stats_line("rules", lem.nrule);
stats_line("states", lem.nstate);
stats_line("states", lem.nxstate);
stats_line("conflicts", lem.nconflict);
stats_line("action table entries", lem.nactiontab);
stats_line("total table size (bytes)", lem.tablesize);
@ -2953,24 +2957,25 @@ char *tag;
int PrintAction(
struct action *ap, /* The action to print */
FILE *fp, /* Print the action here */
int indent, /* Indent by this amount */
struct rule **apRule /* All rules by index */
int indent /* Indent by this amount */
){
int result = 1;
switch( ap->type ){
case SHIFT: {
struct state *stp = ap->x.stp;
fprintf(fp,"%*s shift %-7d",indent,ap->sp->name,stp->statenum);
if( stp->nTknAct==0 && stp->nNtAct==0 && apRule ){
fprintf(fp,"then reduce %d: ", stp->iDflt);
RulePrint(fp, apRule[stp->iDflt], -1);
}
break;
}
case REDUCE: {
struct rule *rp = ap->x.rp;
fprintf(fp,"%*s reduce %-7d",indent,ap->sp->name,rp->index);
if( apRule ) RulePrint(fp, apRule[rp->index], -1);
RulePrint(fp, rp, -1);
break;
}
case SHIFTREDUCE: {
struct rule *rp = ap->x.rp;
fprintf(fp,"%*s shift-reduce %-7d",indent,ap->sp->name,rp->index);
RulePrint(fp, rp, -1);
break;
}
case ACCEPT:
@ -3011,7 +3016,7 @@ int PrintAction(
return result;
}
/* Generate the "y.output" log file */
/* Generate the "*.out" log file */
void ReportOutput(struct lemon *lemp)
{
int i;
@ -3019,20 +3024,10 @@ void ReportOutput(struct lemon *lemp)
struct config *cfp;
struct action *ap;
FILE *fp;
struct rule **apRule;
apRule = malloc( sizeof(apRule[0])*(lemp->nrule+1) );
if( apRule ){
struct rule *x;
memset(apRule, 0, sizeof(apRule[0])*(lemp->nrule+1) );
for(x=lemp->rule; x; x=x->next){
assert( x->index>=0 && x->index<(lemp->nrule+1) );
apRule[x->index] = x;
}
}
fp = file_open(lemp,".out","wb");
if( fp==0 ) return;
for(i=0; i<lemp->nstate; i++){
for(i=0; i<lemp->nxstate; i++){
stp = lemp->sorted[i];
fprintf(fp,"State %d:\n",stp->statenum);
if( lemp->basisflag ) cfp=stp->bp;
@ -3057,7 +3052,7 @@ void ReportOutput(struct lemon *lemp)
}
fprintf(fp,"\n");
for(ap=stp->ap; ap; ap=ap->next){
if( PrintAction(ap,fp,30,apRule) ) fprintf(fp,"\n");
if( PrintAction(ap,fp,30) ) fprintf(fp,"\n");
}
fprintf(fp,"\n");
}
@ -3083,7 +3078,6 @@ void ReportOutput(struct lemon *lemp)
fprintf(fp, "\n");
}
fclose(fp);
free(apRule);
return;
}
@ -3146,9 +3140,10 @@ PRIVATE int compute_action(struct lemon *lemp, struct action *ap)
int act;
switch( ap->type ){
case SHIFT: act = ap->x.stp->statenum; break;
case REDUCE: act = ap->x.rp->index + lemp->nstate; break;
case ERROR: act = lemp->nstate + lemp->nrule; break;
case ACCEPT: act = lemp->nstate + lemp->nrule + 1; break;
case SHIFTREDUCE: act = ap->x.rp->index + lemp->nstate; break;
case REDUCE: act = ap->x.rp->index + lemp->nstate+lemp->nrule; break;
case ERROR: act = lemp->nstate + lemp->nrule*2; break;
case ACCEPT: act = lemp->nstate + lemp->nrule*2 + 1; break;
default: act = -1; break;
}
return act;
@ -3777,7 +3772,7 @@ void ReportTable(
minimum_size_type(0, lemp->nsymbol+1, &szCodeType)); lineno++;
fprintf(out,"#define YYNOCODE %d\n",lemp->nsymbol+1); lineno++;
fprintf(out,"#define YYACTIONTYPE %s\n",
minimum_size_type(0, lemp->nstate+lemp->nrule+5, &szActionType)); lineno++;
minimum_size_type(0,lemp->nstate+lemp->nrule*2+5,&szActionType)); lineno++;
if( lemp->wildcard ){
fprintf(out,"#define YYWILDCARD %d\n",
lemp->wildcard->index); lineno++;
@ -3814,8 +3809,6 @@ void ReportTable(
if( mhflag ){
fprintf(out,"#endif\n"); lineno++;
}
fprintf(out,"#define YYNSTATE %d\n",lemp->nstate); lineno++;
fprintf(out,"#define YYNRULE %d\n",lemp->nrule); lineno++;
if( lemp->errsym->useCnt ){
fprintf(out,"#define YYERRORSYMBOL %d\n",lemp->errsym->index); lineno++;
fprintf(out,"#define YYERRSYMDT yy%d\n",lemp->errsym->dtnum); lineno++;
@ -3823,27 +3816,17 @@ void ReportTable(
if( lemp->has_fallback ){
fprintf(out,"#define YYFALLBACK 1\n"); lineno++;
}
tplt_xfer(lemp->name,in,out,&lineno);
/* Generate the action table and its associates:
**
** yy_action[] A single table containing all actions.
** yy_lookahead[] A table containing the lookahead for each entry in
** yy_action. Used to detect hash collisions.
** yy_shift_ofst[] For each state, the offset into yy_action for
** shifting terminals.
** yy_reduce_ofst[] For each state, the offset into yy_action for
** shifting non-terminals after a reduce.
** yy_default[] Default action for each state.
/* Compute the action table, but do not output it yet. The action
** table must be computed before generating the YYNSTATE macro because
** we need to know how many states can be eliminated.
*/
/* Compute the actions on all states and count them up */
ax = (struct axset *) calloc(lemp->nstate*2 , sizeof(ax[0]));
ax = (struct axset *) calloc(lemp->nxstate*2 , sizeof(ax[0]));
if( ax==0 ){
fprintf(stderr,"malloc failed\n");
exit(1);
}
for(i=0; i<lemp->nstate; i++){
for(i=0; i<lemp->nxstate; i++){
stp = lemp->sorted[i];
ax[i*2].stp = stp;
ax[i*2].isTkn = 1;
@ -3854,15 +3837,12 @@ void ReportTable(
}
mxTknOfst = mnTknOfst = 0;
mxNtOfst = mnNtOfst = 0;
/* Compute the action table. In order to try to keep the size of the
** action table to a minimum, the heuristic of placing the largest action
** sets first is used.
*/
for(i=0; i<lemp->nstate*2; i++) ax[i].iOrder = i;
qsort(ax, lemp->nstate*2, sizeof(ax[0]), axset_compare);
/* In an effort to minimize the action table size, use the heuristic
** of placing the largest action sets first */
for(i=0; i<lemp->nxstate*2; i++) ax[i].iOrder = i;
qsort(ax, lemp->nxstate*2, sizeof(ax[0]), axset_compare);
pActtab = acttab_alloc();
for(i=0; i<lemp->nstate*2 && ax[i].nAction>0; i++){
for(i=0; i<lemp->nxstate*2 && ax[i].nAction>0; i++){
stp = ax[i].stp;
if( ax[i].isTkn ){
for(ap=stp->ap; ap; ap=ap->next){
@ -3891,6 +3871,34 @@ void ReportTable(
}
free(ax);
/* Finish rendering the constants now that the action table has
** been computed */
fprintf(out,"#define YYNSTATE %d\n",lemp->nxstate); lineno++;
fprintf(out,"#define YYNRULE %d\n",lemp->nrule); lineno++;
fprintf(out,"#define YY_MAX_SHIFT %d\n",lemp->nstate-1); lineno++;
fprintf(out,"#define YY_MIN_SHIFTREDUCE %d\n",lemp->nstate); lineno++;
i = lemp->nstate + lemp->nrule;
fprintf(out,"#define YY_MAX_SHIFTREDUCE %d\n", i-1); lineno++;
fprintf(out,"#define YY_MIN_REDUCE %d\n", i); lineno++;
i = lemp->nstate + lemp->nrule*2;
fprintf(out,"#define YY_MAX_REDUCE %d\n", i-1); lineno++;
fprintf(out,"#define YY_ERROR_ACTION %d\n", i); lineno++;
fprintf(out,"#define YY_ACCEPT_ACTION %d\n", i+1); lineno++;
fprintf(out,"#define YY_NO_ACTION %d\n", i+2); lineno++;
tplt_xfer(lemp->name,in,out,&lineno);
/* Now output the action table and its associates:
**
** yy_action[] A single table containing all actions.
** yy_lookahead[] A table containing the lookahead for each entry in
** yy_action. Used to detect hash collisions.
** yy_shift_ofst[] For each state, the offset into yy_action for
** shifting terminals.
** yy_reduce_ofst[] For each state, the offset into yy_action for
** shifting non-terminals after a reduce.
** yy_default[] Default action for each state.
*/
/* Output the yy_action table */
lemp->nactiontab = n = acttab_size(pActtab);
lemp->tablesize += n*szActionType;
@ -3929,7 +3937,7 @@ void ReportTable(
/* Output the yy_shift_ofst[] table */
fprintf(out, "#define YY_SHIFT_USE_DFLT (%d)\n", mnTknOfst-1); lineno++;
n = lemp->nstate;
n = lemp->nxstate;
while( n>0 && lemp->sorted[n-1]->iTknOfst==NO_OFFSET ) n--;
fprintf(out, "#define YY_SHIFT_COUNT (%d)\n", n-1); lineno++;
fprintf(out, "#define YY_SHIFT_MIN (%d)\n", mnTknOfst); lineno++;
@ -3955,7 +3963,7 @@ void ReportTable(
/* Output the yy_reduce_ofst[] table */
fprintf(out, "#define YY_REDUCE_USE_DFLT (%d)\n", mnNtOfst-1); lineno++;
n = lemp->nstate;
n = lemp->nxstate;
while( n>0 && lemp->sorted[n-1]->iNtOfst==NO_OFFSET ) n--;
fprintf(out, "#define YY_REDUCE_COUNT (%d)\n", n-1); lineno++;
fprintf(out, "#define YY_REDUCE_MIN (%d)\n", mnNtOfst); lineno++;
@ -3981,12 +3989,12 @@ void ReportTable(
/* Output the default action table */
fprintf(out, "static const YYACTIONTYPE yy_default[] = {\n"); lineno++;
n = lemp->nstate;
n = lemp->nxstate;
lemp->tablesize += n*szActionType;
for(i=j=0; i<n; i++){
stp = lemp->sorted[i];
if( j==0 ) fprintf(out," /* %5d */ ", i);
fprintf(out, " %4d,", stp->iDflt+n);
fprintf(out, " %4d,", stp->iDfltReduce+lemp->nstate+lemp->nrule);
if( j==9 || i==n-1 ){
fprintf(out, "\n"); lineno++;
j = 0;
@ -4221,7 +4229,7 @@ void CompressTables(struct lemon *lemp)
struct state *stp;
struct action *ap, *ap2;
struct rule *rp, *rp2, *rbest;
int nbest, n;
int nbest, n, nshift;
int i;
int usesWildcard;
@ -4269,6 +4277,32 @@ void CompressTables(struct lemon *lemp)
if( ap->type==REDUCE && ap->x.rp==rbest ) ap->type = NOT_USED;
}
stp->ap = Action_sort(stp->ap);
for(ap=stp->ap; ap; ap=ap->next){
if( ap->type==SHIFT ) break;
if( ap->type==REDUCE && ap->x.rp!=rbest ) break;
}
if( ap==0 ){
stp->autoReduce = 1;
stp->pDfltReduce = rbest;
}
}
/* Make a second pass over all states and actions. Convert
** every action that is a SHIFT to an autoReduce state into
** a SHIFTREDUCE action.
*/
for(i=0; i<lemp->nstate; i++){
stp = lemp->sorted[i];
for(ap=stp->ap; ap; ap=ap->next){
struct state *pNextState;
if( ap->type!=SHIFT ) continue;
pNextState = ap->x.stp;
if( pNextState->autoReduce && pNextState->pDfltReduce!=0 ){
ap->type = SHIFTREDUCE;
ap->x.rp = pNextState->pDfltReduce;
}
}
}
}
@ -4309,17 +4343,19 @@ void ResortStates(struct lemon *lemp)
for(i=0; i<lemp->nstate; i++){
stp = lemp->sorted[i];
stp->nTknAct = stp->nNtAct = 0;
stp->iDflt = lemp->nrule;
stp->iDfltReduce = lemp->nrule; /* Init dflt action to "syntax error" */
stp->iTknOfst = NO_OFFSET;
stp->iNtOfst = NO_OFFSET;
for(ap=stp->ap; ap; ap=ap->next){
if( compute_action(lemp,ap)>=0 ){
int iAction = compute_action(lemp,ap);
if( iAction>=0 ){
if( ap->sp->index<lemp->nterminal ){
stp->nTknAct++;
}else if( ap->sp->index<lemp->nsymbol ){
stp->nNtAct++;
}else{
stp->iDflt = compute_action(lemp, ap) - lemp->nstate;
assert( stp->autoReduce==0 || stp->pDfltReduce==ap->x.rp );
stp->iDfltReduce = iAction - lemp->nstate - lemp->nrule;
}
}
}
@ -4329,6 +4365,10 @@ void ResortStates(struct lemon *lemp)
for(i=0; i<lemp->nstate; i++){
lemp->sorted[i]->statenum = i;
}
lemp->nxstate = lemp->nstate;
while( lemp->nxstate>1 && lemp->sorted[lemp->nxstate-1]->autoReduce ){
lemp->nxstate--;
}
}

View file

@ -59,15 +59,19 @@
** ParseARG_PDECL A parameter declaration for the %extra_argument
** ParseARG_STORE Code to store %extra_argument into yypParser
** ParseARG_FETCH Code to extract %extra_argument from yypParser
** YYNSTATE the combined number of states.
** YYNRULE the number of rules in the grammar
** YYERRORSYMBOL is the code number of the error symbol. If not
** defined, then do no error processing.
** YYNSTATE the combined number of states.
** YYNRULE the number of rules in the grammar
** YY_MAX_SHIFT Maximum value for shift actions
** YY_MIN_SHIFTREDUCE Minimum value for shift-reduce actions
** YY_MAX_SHIFTREDUCE Maximum value for shift-reduce actions
** YY_MIN_REDUCE Maximum value for reduce actions
** YY_ERROR_ACTION The yy_action[] code for syntax error
** YY_ACCEPT_ACTION The yy_action[] code for accept
** YY_NO_ACTION The yy_action[] code for no-op
*/
%%
#define YY_NO_ACTION (YYNSTATE+YYNRULE+2)
#define YY_ACCEPT_ACTION (YYNSTATE+YYNRULE+1)
#define YY_ERROR_ACTION (YYNSTATE+YYNRULE)
/* The yyzerominor constant is used to initialize instances of
** YYMINORTYPE objects to zero. */
@ -94,16 +98,20 @@ static const YYMINORTYPE yyzerominor = { 0 };
** Suppose the action integer is N. Then the action is determined as
** follows
**
** 0 <= N < YYNSTATE Shift N. That is, push the lookahead
** 0 <= N <= YY_MAX_SHIFT Shift N. That is, push the lookahead
** token onto the stack and goto state N.
**
** YYNSTATE <= N < YYNSTATE+YYNRULE Reduce by rule N-YYNSTATE.
** N between YY_MIN_SHIFTREDUCE Shift to an arbitrary state then
** and YY_MAX_SHIFTREDUCE reduce by rule N-YY_MIN_SHIFTREDUCE.
**
** N == YYNSTATE+YYNRULE A syntax error has occurred.
** N between YY_MIN_REDUCE Reduce by rule N-YY_MIN_REDUCE
** and YY_MAX_REDUCE
** N == YY_ERROR_ACTION A syntax error has occurred.
**
** N == YYNSTATE+YYNRULE+1 The parser accepts its input.
** N == YY_ACCEPT_ACTION The parser accepts its input.
**
** N == YYNSTATE+YYNRULE+2 No such action. Denotes unused
** N == YY_NO_ACTION No such action. Denotes unused
** slots in the yy_action[] table.
**
** The action table is constructed as a single large table named yy_action[].
@ -497,9 +505,9 @@ static void yyStackOverflow(yyParser *yypParser, YYMINORTYPE *yypMinor){
}
/*
** Perform a shift action.
** Perform a shift action. Return the number of errors.
*/
static void yy_shift(
static int yy_shift(
yyParser *yypParser, /* The parser to be shifted */
int yyNewState, /* The new state to shift in */
int yyMajor, /* The major token to shift in */
@ -515,14 +523,14 @@ static void yy_shift(
#if YYSTACKDEPTH>0
if( yypParser->yyidx>=YYSTACKDEPTH ){
yyStackOverflow(yypParser, yypMinor);
return;
return 1;
}
#else
if( yypParser->yyidx>=yypParser->yystksz ){
yyGrowStack(yypParser);
if( yypParser->yyidx>=yypParser->yystksz ){
yyStackOverflow(yypParser, yypMinor);
return;
return 1;
}
}
#endif
@ -533,13 +541,18 @@ static void yy_shift(
#ifndef NDEBUG
if( yyTraceFILE && yypParser->yyidx>0 ){
int i;
if( yyNewState<YYNSTATE ){
fprintf(yyTraceFILE,"%sShift %d\n",yyTracePrompt,yyNewState);
fprintf(yyTraceFILE,"%sStack:",yyTracePrompt);
for(i=1; i<=yypParser->yyidx; i++)
fprintf(yyTraceFILE," (%d)%s",yypParser->yystack[i].stateno,yyTokenName[yypParser->yystack[i].major]);
fprintf(yyTraceFILE," %s",yyTokenName[yypParser->yystack[i].major]);
fprintf(yyTraceFILE,"\n");
}else{
fprintf(yyTraceFILE,"%sShift *\n",yyTracePrompt);
}
}
#endif
return 0;
}
/* The following table contains information about every rule that
@ -572,8 +585,9 @@ static void yy_reduce(
#ifndef NDEBUG
if( yyTraceFILE && yyruleno>=0
&& yyruleno<(int)(sizeof(yyRuleName)/sizeof(yyRuleName[0])) ){
fprintf(yyTraceFILE, "%sReduce [%s].\n", yyTracePrompt,
yyRuleName[yyruleno]);
yysize = yyRuleInfo[yyruleno].nrhs;
fprintf(yyTraceFILE, "%sReduce [%s] -> state %d.\n", yyTracePrompt,
yyRuleName[yyruleno], yymsp[-yysize].stateno);
}
#endif /* NDEBUG */
@ -609,9 +623,8 @@ static void yy_reduce(
yysize = yyRuleInfo[yyruleno].nrhs;
yypParser->yyidx -= yysize;
yyact = yy_find_reduce_action(yymsp[-yysize].stateno,(YYCODETYPE)yygoto);
if( yyact < YYNSTATE ){
#ifdef NDEBUG
/* If we are not debugging and the reduce action popped at least
if( yyact < YY_MAX_SHIFTREDUCE ){
/* If the reduce action popped at least
** one element off the stack, then we can push the new element back
** onto the stack here, and skip the stack overflow test in yy_shift().
** That gives a significant speed improvement. */
@ -621,13 +634,19 @@ static void yy_reduce(
yymsp->stateno = (YYACTIONTYPE)yyact;
yymsp->major = (YYCODETYPE)yygoto;
yymsp->minor = yygotominor;
}else
#ifndef NDEBUG
if( yyTraceFILE ){
fprintf(yyTraceFILE,"%sShift %d\n",yyTracePrompt,yyact);
}
#endif
{
yy_shift(yypParser,yyact,yygoto,&yygotominor);
}else{
if( yy_shift(yypParser,yyact,yygoto,&yygotominor) ) yyact = 0;
}
if( yyact>=YY_MIN_SHIFTREDUCE ){
yy_reduce(yypParser, yyact - YY_MIN_SHIFTREDUCE);
}
}else{
assert( yyact == YYNSTATE + YYNRULE + 1 );
assert( yyact == YY_ACCEPT_ACTION );
yy_accept(yypParser);
}
}
@ -747,13 +766,16 @@ void Parse(
do{
yyact = yy_find_shift_action(yypParser,(YYCODETYPE)yymajor);
if( yyact<YYNSTATE ){
assert( !yyendofinput ); /* Impossible to shift the $ token */
yy_shift(yypParser,yyact,yymajor,&yyminorunion);
if( yyact <= YY_MAX_SHIFTREDUCE ){
if( yy_shift(yypParser,yyact,yymajor,&yyminorunion)==0 ){
yypParser->yyerrcnt--;
yymajor = YYNOCODE;
}else if( yyact < YYNSTATE + YYNRULE ){
yy_reduce(yypParser,yyact-YYNSTATE);
if( yyact > YY_MAX_SHIFT ){
yy_reduce(yypParser, yyact-YY_MIN_SHIFTREDUCE);
}
}
}else if( yyact <= YY_MAX_REDUCE ){
yy_reduce(yypParser,yyact-YY_MIN_REDUCE);
}else{
#ifdef YYERRORSYMBOL
int yymx;
@ -803,7 +825,7 @@ void Parse(
yymx != YYERRORSYMBOL &&
(yyact = yy_find_reduce_action(
yypParser->yystack[yypParser->yyidx].stateno,
YYERRORSYMBOL)) >= YYNSTATE
YYERRORSYMBOL)) >= YY_MIN_REDUCE
){
yy_pop_parser_stack(yypParser);
}
@ -853,5 +875,10 @@ void Parse(
#endif
}
}while( yymajor!=YYNOCODE && yypParser->yyidx>=0 );
#ifndef NDEBUG
if( yyTraceFILE ){
fprintf(yyTraceFILE,"%sReturn\n",yyTracePrompt);
}
#endif
return;
}