From 47d803b4fee6cdd58d13642a1029a1126234cb01 Mon Sep 17 00:00:00 2001
From: Daniel Gibson <metalcaedes@gmail.com>
Date: Sat, 28 Jan 2023 19:48:39 +0100
Subject: [PATCH] Use Mem_MallocA() in Model_Overlay.cpp, tr_light.cpp,
 tr_trisurf.cpp

I hope that should cover all relevant cases, though there are still
plenty of _alloca() and _alloca16() calls left throughout the code
---
 neo/renderer/ModelOverlay.cpp | 15 +++++++++++--
 neo/renderer/tr_light.cpp     |  5 ++++-
 neo/renderer/tr_trisurf.cpp   | 40 +++++++++++++++++++++++------------
 3 files changed, 44 insertions(+), 16 deletions(-)

diff --git a/neo/renderer/ModelOverlay.cpp b/neo/renderer/ModelOverlay.cpp
index 2868e24a..96bdca72 100644
--- a/neo/renderer/ModelOverlay.cpp
+++ b/neo/renderer/ModelOverlay.cpp
@@ -117,8 +117,16 @@ void idRenderModelOverlay::CreateOverlay( const idRenderModel *model, const idPl
 	}
 
 	// make temporary buffers for the building process
-	overlayVertex_t	*overlayVerts = (overlayVertex_t *)_alloca( maxVerts * sizeof( *overlayVerts ) );
-	glIndex_t *overlayIndexes = (glIndex_t *)_alloca16( maxIndexes * sizeof( *overlayIndexes ) );
+	// DG: using Mem_MallocA() instead of alloca() to avoid stack overflows with large models
+	size_t vertSize = maxVerts * sizeof( overlayVertex_t );
+	bool vertsOnStack;
+	overlayVertex_t	*overlayVerts = (overlayVertex_t *)Mem_MallocA( vertSize, vertsOnStack );
+
+	// Note: here we have two Mem_MallocA() calls, this relies on the stack being
+	//       big enough for two alloca(ID_MAX_ALLOCA_SIZE) calls!
+	size_t idxSize = maxIndexes * sizeof( glIndex_t );
+	bool idxOnStack;
+	glIndex_t *overlayIndexes = (glIndex_t *)Mem_MallocA( idxSize, idxOnStack );
 
 	// pull out the triangles we need from the base surfaces
 	for ( surfNum = 0; surfNum < model->NumBaseSurfaces(); surfNum++ ) {
@@ -224,6 +232,9 @@ void idRenderModelOverlay::CreateOverlay( const idRenderModel *model, const idPl
 			materials[i]->surfaces.RemoveIndex( 0 );
 		}
 	}
+
+	Mem_FreeA(overlayVerts, vertsOnStack);
+	Mem_FreeA(overlayIndexes, idxOnStack);
 }
 
 /*
diff --git a/neo/renderer/tr_light.cpp b/neo/renderer/tr_light.cpp
index 523a08a2..6b2db540 100644
--- a/neo/renderer/tr_light.cpp
+++ b/neo/renderer/tr_light.cpp
@@ -164,7 +164,9 @@ void R_CreateVertexProgramShadowCache( srfTriangles_t *tri ) {
 		return;
 	}
 
-	shadowCache_t *temp = (shadowCache_t *)_alloca16( tri->numVerts * 2 * sizeof( shadowCache_t ) );
+	// DG: use Mem_MallocA() instead of _alloca16() to avoid stack overflows with big models
+	bool tempOnStack;
+	shadowCache_t *temp = (shadowCache_t *)Mem_MallocA( tri->numVerts * 2 * sizeof( shadowCache_t ), tempOnStack );
 
 #if 1
 
@@ -189,6 +191,7 @@ void R_CreateVertexProgramShadowCache( srfTriangles_t *tri ) {
 #endif
 
 	vertexCache.Alloc( temp, tri->numVerts * 2 * sizeof( shadowCache_t ), &tri->shadowCache );
+	Mem_FreeA( temp, tempOnStack );
 }
 
 /*
diff --git a/neo/renderer/tr_trisurf.cpp b/neo/renderer/tr_trisurf.cpp
index 2080c094..28726081 100644
--- a/neo/renderer/tr_trisurf.cpp
+++ b/neo/renderer/tr_trisurf.cpp
@@ -790,7 +790,9 @@ R_CreateDupVerts
 void R_CreateDupVerts( srfTriangles_t *tri ) {
 	int i;
 
-	int *remap = (int *) _alloca16( tri->numVerts * sizeof( remap[0] ) );
+	// DG: use Mem_MallocA() instead of _alloca16() to avoid stack overflows with big models
+	bool remapOnStack;
+	int *remap = (int *)Mem_MallocA( tri->numVerts * sizeof( remap[0] ), remapOnStack );
 
 	// initialize vertex remap in case there are unused verts
 	for ( i = 0; i < tri->numVerts; i++ ) {
@@ -803,7 +805,9 @@ void R_CreateDupVerts( srfTriangles_t *tri ) {
 	}
 
 	// create duplicate vertex index based on the vertex remap
-	int * tempDupVerts = (int *) _alloca16( tri->numVerts * 2 * sizeof( tempDupVerts[0] ) );
+	bool tempDupVertsOnStack;
+	int *tempDupVerts = (int *)Mem_MallocA( tri->numVerts * 2 * sizeof( tempDupVerts[0] ), tempDupVertsOnStack );
+
 	tri->numDupVerts = 0;
 	for ( i = 0; i < tri->numVerts; i++ ) {
 		if ( remap[i] != i ) {
@@ -819,6 +823,9 @@ void R_CreateDupVerts( srfTriangles_t *tri ) {
 	} else {
 		tri->dupVerts = NULL;
 	}
+
+	Mem_FreeA( remap, remapOnStack );
+	Mem_FreeA( tempDupVerts, tempDupVertsOnStack );
 }
 
 /*
@@ -1279,7 +1286,10 @@ static void	R_DuplicateMirroredVertexes( srfTriangles_t *tri ) {
 	int				totalVerts;
 	int				numMirror;
 
-	tverts = (tangentVert_t *)_alloca16( tri->numVerts * sizeof( *tverts ) );
+	// DG: use Mem_MallocA() instead of _alloca16() to avoid stack overflows with big models
+	bool tvertsOnStack;
+	tverts = (tangentVert_t *)Mem_MallocA( tri->numVerts * sizeof( *tverts ), tvertsOnStack );
+
 	memset( tverts, 0, tri->numVerts * sizeof( *tverts ) );
 
 	// determine texture polarity of each surface
@@ -1309,6 +1319,7 @@ static void	R_DuplicateMirroredVertexes( srfTriangles_t *tri ) {
 	// now create the new list
 	if ( totalVerts == tri->numVerts ) {
 		tri->mirroredVerts = NULL;
+		Mem_FreeA( tverts, tvertsOnStack );
 		return;
 	}
 
@@ -1344,6 +1355,8 @@ static void	R_DuplicateMirroredVertexes( srfTriangles_t *tri ) {
 	}
 
 	tri->numVerts = totalVerts;
+
+	Mem_FreeA( tverts, tvertsOnStack );
 }
 
 /*
@@ -1386,14 +1399,10 @@ void R_DeriveTangentsWithoutNormals( srfTriangles_t *tri ) {
 	faceTangents_t	*ft;
 	idDrawVert		*vert;
 
-	// DG: windows only has a 1MB stack and it could happen that we try to allocate >1MB here
-	//     (in lost mission mod, game/le_hell map), causing a stack overflow
-	//     to prevent that, use heap allocation if it's >600KB
+	// DG: use Mem_MallocA() instead of _alloca16() to avoid stack overflows with big models
 	size_t allocaSize = sizeof(faceTangents[0]) * tri->numIndexes/3;
-	if(allocaSize < 600000)
-		faceTangents = (faceTangents_t *)_alloca16( allocaSize );
-	else
-		faceTangents = (faceTangents_t *)Mem_Alloc16( allocaSize );
+	bool faceTangentsOnStack;
+	faceTangents = (faceTangents_t *)Mem_MallocA( allocaSize, faceTangentsOnStack );
 
 	R_DeriveFaceTangents( tri, faceTangents );
 
@@ -1451,8 +1460,7 @@ void R_DeriveTangentsWithoutNormals( srfTriangles_t *tri ) {
 
 	tri->tangentsCalculated = true;
 
-	if(allocaSize >= 600000)
-		Mem_Free16( faceTangents );
+	Mem_FreeA( faceTangents, faceTangentsOnStack );
 }
 
 static ID_INLINE void VectorNormalizeFast2( const idVec3 &v, idVec3 &out) {
@@ -1685,8 +1693,12 @@ void R_DeriveTangents( srfTriangles_t *tri, bool allocFacePlanes ) {
 
 #if 1
 
+	// ok, this is also true if they're not on the stack but from tri->facePlanes
+	// (either way, Mem_FreeA() mustn't free() them)
+	bool planesOnStack = true;
 	if ( !planes ) {
-		planes = (idPlane *)_alloca16( ( tri->numIndexes / 3 ) * sizeof( planes[0] ) );
+		// DG: use Mem_MallocA() instead of _alloca16() to avoid stack overflows with big models
+		planes = (idPlane *)Mem_MallocA( ( tri->numIndexes / 3 ) * sizeof( planes[0] ), planesOnStack );
 	}
 
 	SIMDProcessor->DeriveTangents( planes, tri->verts, tri->numVerts, tri->indexes, tri->numIndexes );
@@ -1846,6 +1858,8 @@ void R_DeriveTangents( srfTriangles_t *tri, bool allocFacePlanes ) {
 
 	tri->tangentsCalculated = true;
 	tri->facePlanesCalculated = true;
+
+	Mem_FreeA( planes, planesOnStack );
 }
 
 /*