MSVC: Don't use YQ2_VLA() in loops

because on MSVC it uses alloca() (or _malloca()) which mustn't be called
in loops, as the memory is only freed when returning from the function,
not when leaving the scope (or before the next loop iteration).

Instead do one "dry-run" iteration to figure out how big the array must
be at most, and then allocate it once before the loop with that size.
This commit is contained in:
Daniel Gibson 2022-02-27 04:18:03 +01:00
parent 34a8c3833f
commit 4a02c790e9
3 changed files with 91 additions and 15 deletions

View file

@ -154,6 +154,29 @@ R_DrawAliasFrameLerp(entity_t *currententity, dmdl_t *paliashdr, float backlerp)
R_LerpVerts(currententity, paliashdr->num_xyz, v, ov, verts, lerp, move, frontv, backv);
#ifdef _MSC_VER // workaround for lack of VLAs (=> our workaround uses alloca() which is bad in loops)
int maxCount = 0;
const int* tmpOrder = order;
while (1)
{
int c = *tmpOrder++;
if (!c)
break;
if ( c < 0 )
c = -c;
if ( c > maxCount )
maxCount = c;
do {
tmpOrder += 3;
} while (--c);
}
YQ2_VLA( GLfloat, vtx, 3 * maxCount );
YQ2_VLA( GLfloat, tex, 2 * maxCount );
YQ2_VLA( GLfloat, clr, 4 * maxCount );
#endif
while (1)
{
/* get the vertex count and primitive type */
@ -176,9 +199,12 @@ R_DrawAliasFrameLerp(entity_t *currententity, dmdl_t *paliashdr, float backlerp)
}
total = count;
YQ2_VLA(GLfloat, vtx, 3*total); // FIXME: alloca in loop is bad!
#ifndef _MSC_VER // we have real VLAs, so it's safe to use one in this loop
YQ2_VLA(GLfloat, vtx, 3*total);
YQ2_VLA(GLfloat, tex, 2*total);
YQ2_VLA(GLfloat, clr, 4*total);
#endif
unsigned int index_vtx = 0;
unsigned int index_tex = 0;
unsigned int index_clr = 0;
@ -240,12 +266,12 @@ R_DrawAliasFrameLerp(entity_t *currententity, dmdl_t *paliashdr, float backlerp)
glDisableClientState(GL_VERTEX_ARRAY);
glDisableClientState(GL_TEXTURE_COORD_ARRAY);
glDisableClientState(GL_COLOR_ARRAY);
YQ2_VLAFREE(vtx);
YQ2_VLAFREE(tex);
YQ2_VLAFREE(clr)
}
YQ2_VLAFREE( vtx );
YQ2_VLAFREE( tex );
YQ2_VLAFREE( clr )
if (currententity->flags &
(RF_SHELL_RED | RF_SHELL_GREEN | RF_SHELL_BLUE |
RF_SHELL_DOUBLE | RF_SHELL_HALF_DAM))
@ -276,6 +302,27 @@ R_DrawAliasShadow(entity_t *currententity, dmdl_t *paliashdr, int posenum)
glStencilOp(GL_KEEP, GL_KEEP, GL_INCR);
}
#ifdef _MSC_VER // workaround for lack of VLAs (=> our workaround uses alloca() which is bad in loops)
int maxCount = 0;
const int* tmpOrder = order;
while (1)
{
int c = *tmpOrder++;
if (!c)
break;
if (c < 0)
c = -c;
if (c > maxCount)
maxCount = c;
do {
tmpOrder += 3;
} while (--c);
}
YQ2_VLA(GLfloat, vtx, 3 * maxCount);
#endif
while (1)
{
/* get the vertex count and primitive type */
@ -298,8 +345,10 @@ R_DrawAliasShadow(entity_t *currententity, dmdl_t *paliashdr, int posenum)
}
total = count;
YQ2_VLA(GLfloat, vtx, 3*total); // FIXME: alloca in loop is bad!
#ifndef _MSC_VER // we have real VLAs, so it's safe to use one in this loop
YQ2_VLA(GLfloat, vtx, 3*total);
#endif
unsigned int index_vtx = 0;
do
@ -325,8 +374,8 @@ R_DrawAliasShadow(entity_t *currententity, dmdl_t *paliashdr, int posenum)
glDrawArrays( type, 0, total );
glDisableClientState( GL_VERTEX_ARRAY );
YQ2_VLAFREE(vtx);
}
YQ2_VLAFREE(vtx);
/* stencilbuffer shadows */
if (gl_state.stencil && gl1_stencilshadow->value)

View file

@ -205,14 +205,28 @@ R_DrawGLPolyChain(glpoly_t *p, float soffset, float toffset)
}
else
{
// workaround for lack of VLAs (=> our workaround uses alloca() which is bad in loops)
#ifdef _MSC_VER
int maxNumVerts = 0;
for (glpoly_t* tmp = p; tmp; tmp = tmp->chain)
{
if ( tmp->numverts > maxNumVerts )
maxNumVerts = tmp->numverts;
}
YQ2_VLA( GLfloat, tex, 2 * maxNumVerts );
#endif
for ( ; p != 0; p = p->chain)
{
float *v;
int j;
v = p->verts[0];
#ifndef _MSC_VER // we have real VLAs, so it's safe to use one in this loop
YQ2_VLA(GLfloat, tex, 2*p->numverts);
#endif
YQ2_VLA(GLfloat, tex, 2*p->numverts); // FIXME: alloca in loop is bad!
unsigned int index_tex = 0;
for ( j = 0; j < p->numverts; j++, v += VERTEXSIZE )
@ -232,9 +246,9 @@ R_DrawGLPolyChain(glpoly_t *p, float soffset, float toffset)
glDisableClientState( GL_VERTEX_ARRAY );
glDisableClientState( GL_TEXTURE_COORD_ARRAY );
YQ2_VLAFREE(tex);
}
YQ2_VLAFREE( tex );
}
}

View file

@ -297,11 +297,24 @@ R_EmitWaterPolys(msurface_t *fa)
scroll = 0;
}
// workaround for lack of VLAs (=> our workaround uses alloca() which is bad in loops)
#ifdef _MSC_VER
int maxNumVerts = 0;
for ( glpoly_t* tmp = fa->polys; tmp; tmp = tmp->next )
{
if (tmp->numverts > maxNumVerts)
maxNumVerts = tmp->numverts;
}
YQ2_VLA( GLfloat, tex, 2 * maxNumVerts );
#endif
for (bp = fa->polys; bp; bp = bp->next)
{
p = bp;
YQ2_VLA(GLfloat, tex, 2*p->numverts); // FIXME: alloca in loop is bad!
#ifndef _MSC_VER // we have real VLAs, so it's safe to use one in this loop
YQ2_VLA(GLfloat, tex, 2*p->numverts);
#endif
unsigned int index_tex = 0;
for ( i = 0, v = p->verts [ 0 ]; i < p->numverts; i++, v += VERTEXSIZE )
@ -328,9 +341,9 @@ R_EmitWaterPolys(msurface_t *fa)
glDisableClientState( GL_VERTEX_ARRAY );
glDisableClientState( GL_TEXTURE_COORD_ARRAY );
YQ2_VLAFREE(tex);
}
YQ2_VLAFREE( tex );
}
void