diff --git a/engine/client/m_options.c b/engine/client/m_options.c
index a074014c1..4f3d0aa56 100644
--- a/engine/client/m_options.c
+++ b/engine/client/m_options.c
@@ -1568,12 +1568,16 @@ void M_Menu_Render_f (void)
 		"1",
 		NULL
 	};
+
 	static const char *logcenteropts[] = {"Off", "Singleplayer", "Always", NULL};
 	static const char *logcentervalues[] = {"0", "1", "2", NULL};
 
 	static const char *cshiftopts[] = {"Off", "Fullscreen", "Edges", NULL};
 	static const char *cshiftvalues[] = {"0", "1", "2", NULL};
 
+	static const char *scenecacheopts[] = {"Auto", "Force Off", "Force On", NULL};
+	static const char *scenecachevalues[] = {"", "0", "1", NULL};
+
 	emenu_t *menu;
 	extern cvar_t r_novis, cl_item_bobbing, r_waterwarp, r_nolerp, r_noframegrouplerp, r_fastsky, gl_nocolors, gl_lerpimages, r_wateralpha, r_drawviewmodel, gl_cshiftenabled, r_hdr_irisadaptation, scr_logcenterprint, r_fxaa, r_graphics;
 #if defined(GLQUAKE) || defined(VKQUAKE)
@@ -1593,6 +1597,7 @@ void M_Menu_Render_f (void)
 		MB_CHECKBOXCVAR("Disable Model Lerp", r_nolerp, 0),
 		MB_CHECKBOXCVAR("Disable Framegroup Lerp", r_noframegrouplerp, 0),
 		MB_CHECKBOXCVAR("Model Bobbing", cl_item_bobbing, 0),
+		MB_COMBOCVAR("Scene Cache", r_temporalscenecache, scenecacheopts,scenecachevalues, "Cache scene data to significantly optimise highly complex scenes or unvised maps.\nThis may result in offscreen surfaces getting rendered."),
 		MB_COMBOCVAR("Water Warp", r_waterwarp, warpopts, warpvalues, NULL),
 		MB_SLIDER("Water Alpha", r_wateralpha, 0, 1, 0.1, NULL),
 		MB_SLIDER("Viewmodel Alpha", r_drawviewmodel, 0, 1, 0.1, NULL),
@@ -1606,7 +1611,6 @@ void M_Menu_Render_f (void)
 		MB_CHECKBOXCVAR("Bloom", r_bloom, 0),
 #endif
 		MB_CHECKBOXCVARTIP("HDR", r_hdr_irisadaptation, 0, "Adjust scene brightness to compensate for lighting levels."),
-		MB_CHECKBOXCVARTIP("Temporal Scene Cache", r_temporalscenecache, 0, "Cache scene data to significantly optimise highly complex scenes or unvised maps.\n"CON_WARNING"Unfortunately this is incompatible with certain techniques, so may need to be disabled for compat with legacy content."),
 		MB_END()
 	};
 	menu = M_Options_Title(&y, 0);
diff --git a/engine/client/r_surf.c b/engine/client/r_surf.c
index 5495ca8c8..cf84d5923 100644
--- a/engine/client/r_surf.c
+++ b/engine/client/r_surf.c
@@ -3009,10 +3009,12 @@ struct webostate_s
 	struct webostate_s *next;
 	int lastvalid;	//keyed to cls.framecount, for cleaning up.
 	model_t *wmodel;
+	int framecount;
 	int cluster[2];
 	qboolean generating;
 	pvsbuffer_t pvs;
 	vboarray_t ebo;
+	vboarray_t vbo;
 	void *ebomem;
 	size_t idxcount;
 	int numbatches;
@@ -3026,6 +3028,7 @@ struct webostate_s
 
 	struct wesbatch_s
 	{
+		qboolean inefficient;	//this batch's shader needs special care with vertex data too
 		size_t numidx;
 		size_t maxidx;
 		size_t firstidx;	//offset into the final ebo
@@ -3034,6 +3037,8 @@ struct webostate_s
 		mesh_t m;
 		mesh_t *pm;
 		vbo_t vbo;
+
+		size_t maxverts;
 	} batches[1];
 };
 static struct webostate_s *webostates;
@@ -3046,11 +3051,27 @@ static void R_DestroyWorldEBO(struct webostate_s *es)
 		return;
 
 	for (i = 0; i < es->numbatches; i++)
+	{
+		if (es->batches[i].inefficient)
+		{
+			BZ_Free(es->batches[i].m.xyz_array);
+			BZ_Free(es->batches[i].m.st_array);
+			BZ_Free(es->batches[i].m.lmst_array[0]);
+			BZ_Free(es->batches[i].m.normals_array);
+			BZ_Free(es->batches[i].m.snormals_array);
+			BZ_Free(es->batches[i].m.tnormals_array);
+		}
 		BZ_Free(es->batches[i].idxbuffer);
+	}
 
 #ifdef GLQUAKE
 	if (qrenderer == QR_OPENGL)
-		qglDeleteBuffersARB(1, &es->ebo.gl.vbo);
+	{
+		if (es->ebo.gl.vbo)
+			qglDeleteBuffersARB(1, &es->ebo.gl.vbo);
+		if (es->vbo.gl.vbo)
+			qglDeleteBuffersARB(1, &es->vbo.gl.vbo);
+	}
 #endif
 #ifdef VKQUAKE
 	if (qrenderer == QR_VULKAN)
@@ -3060,7 +3081,7 @@ static void R_DestroyWorldEBO(struct webostate_s *es)
 }
 void R_GeneratedWorldEBO(void *ctx, void *data, size_t a_, size_t b_)
 {
-	size_t idxcount;
+	size_t idxcount, vertcount;
 	unsigned int i;
 	model_t *mod;
 	batch_t *b, *batch;
@@ -3076,13 +3097,61 @@ void R_GeneratedWorldEBO(void *ctx, void *data, size_t a_, size_t b_)
 
 	webostate->lastvalid = cls.framecount;
 
-	for (i = 0, idxcount = 0; i < webostate->numbatches; i++)
+	for (i = 0, idxcount = 0, vertcount = 0; i < webostate->numbatches; i++)
+	{
 		idxcount += webostate->batches[i].numidx;
+		vertcount += webostate->batches[i].m.numvertexes;
+	}
 #ifdef GLQUAKE
 	if (qrenderer == QR_OPENGL)
 	{
 		GL_DeselectVAO();
 
+		if (vertcount)
+		{
+			size_t vc;
+			vbo_t *vbo;
+			size_t v_coord	= 0;
+			size_t v_tc		= v_coord	+ sizeof(vecV_t)*vertcount;
+			size_t v_lmtc	= v_tc		+ sizeof(vec2_t)*vertcount;
+			size_t v_norm	= v_lmtc	+ sizeof(vec2_t)*vertcount;
+			size_t v_snorm	= v_norm	+ sizeof(vec3_t)*vertcount;
+			size_t v_tnorm	= v_snorm	+ sizeof(vec3_t)*vertcount;
+			size_t v_colour	= v_tnorm	+ sizeof(vec3_t)*vertcount;
+			size_t vbosize	= v_colour	+ sizeof(vec4_t)*vertcount;
+
+			if (!webostate->vbo.gl.vbo)
+				qglGenBuffersARB(1, &webostate->vbo.gl.vbo);
+			GL_SelectVBO(webostate->vbo.gl.vbo);
+			qglBufferDataARB(GL_ARRAY_BUFFER_ARB, vbosize, NULL, GL_STATIC_DRAW_ARB);
+			for (i = 0, vertcount = 0; i < webostate->numbatches; i++)
+			{
+				if (webostate->batches[i].inefficient)
+				{
+					vc = webostate->batches[i].m.numvertexes;
+
+					vbo = &webostate->batches[i].vbo;
+					vbo->coord.gl.vbo		= webostate->vbo.gl.vbo;	vbo->coord.gl.addr		= (char*)v_coord	+ sizeof(vecV_t)*vertcount;
+					vbo->texcoord.gl.vbo	= webostate->vbo.gl.vbo;	vbo->texcoord.gl.addr	= (char*)v_tc		+ sizeof(vec2_t)*vertcount;
+					vbo->lmcoord[0].gl.vbo	= webostate->vbo.gl.vbo;	vbo->lmcoord[0].gl.addr = (char*)v_lmtc		+ sizeof(vec2_t)*vertcount;
+					vbo->normals.gl.vbo		= webostate->vbo.gl.vbo;	vbo->normals.gl.addr	= (char*)v_norm		+ sizeof(vec3_t)*vertcount;
+					vbo->svector.gl.vbo		= webostate->vbo.gl.vbo;	vbo->svector.gl.addr	= (char*)v_snorm	+ sizeof(vec3_t)*vertcount;
+					vbo->tvector.gl.vbo		= webostate->vbo.gl.vbo;	vbo->tvector.gl.addr	= (char*)v_tnorm	+ sizeof(vec3_t)*vertcount;
+					vbo->colours[0].gl.vbo	= webostate->vbo.gl.vbo;	vbo->colours[0].gl.addr	= (char*)v_colour	+ sizeof(vec4_t)*vertcount;
+
+					qglBufferSubDataARB(GL_ARRAY_BUFFER_ARB,(qintptr_t)vbo->coord.gl.addr,		vc*sizeof(vecV_t), webostate->batches[i].m.xyz_array);
+					qglBufferSubDataARB(GL_ARRAY_BUFFER_ARB,(qintptr_t)vbo->texcoord.gl.addr,	vc*sizeof(vec2_t), webostate->batches[i].m.st_array);
+					qglBufferSubDataARB(GL_ARRAY_BUFFER_ARB,(qintptr_t)vbo->lmcoord[0].gl.addr,	vc*sizeof(vec2_t), webostate->batches[i].m.lmst_array[0]);
+					qglBufferSubDataARB(GL_ARRAY_BUFFER_ARB,(qintptr_t)vbo->normals.gl.addr,	vc*sizeof(vec3_t), webostate->batches[i].m.normals_array);
+					qglBufferSubDataARB(GL_ARRAY_BUFFER_ARB,(qintptr_t)vbo->svector.gl.addr,	vc*sizeof(vec3_t), webostate->batches[i].m.snormals_array);
+					qglBufferSubDataARB(GL_ARRAY_BUFFER_ARB,(qintptr_t)vbo->tvector.gl.addr,	vc*sizeof(vec3_t), webostate->batches[i].m.tnormals_array);
+					qglBufferSubDataARB(GL_ARRAY_BUFFER_ARB,(qintptr_t)vbo->colours[0].gl.addr,	vc*sizeof(vec4_t), webostate->batches[i].m.colors4f_array[0]);
+					webostate->batches[i].m.vbofirstvert = 0;
+					vertcount += vc;
+				}
+			}
+		}
+
 		webostate->ebo.gl.addr = NULL;
 		if (!webostate->ebo.gl.vbo)
 			qglGenBuffersARB(1, &webostate->ebo.gl.vbo);
@@ -3123,6 +3192,8 @@ void R_GeneratedWorldEBO(void *ctx, void *data, size_t a_, size_t b_)
 			webostate->ebomem = NULL;
 		}
 		free(indexes);
+
+		vertcount = 0; //unsupported for now.
 	}
 #endif
 
@@ -3142,28 +3213,33 @@ void R_GeneratedWorldEBO(void *ctx, void *data, size_t a_, size_t b_)
 			webostate->batches[i].pm = m;
 			b = &webostate->batches[i].b;
 			memcpy(b, batch, sizeof(*b));
-			memset(m, 0, sizeof(*m));
 
-			if (b->shader->flags & SHADER_NEEDSARRAYS)
-			{	//this ebo cache stuff tracks only indexes, we don't know the actual surfs any more.
-				//if NEEDSARRAYS is flagged then the cpu will need access to the mesh data - which it doesn't have.
-				//while we could figure out this info, there would be a lot of vertexes that are not referenced, which would be horrendously slow.
-				if (b->shader->flags & SHADER_SKY)
-					continue;
-				b->shader = R_RegisterShader_Vertex(mod, "unsupported");
-			}
-
-			m->numvertexes = webostate->batches[i].b.vbo->vertcount;
 			b->mesh = &webostate->batches[i].pm;
 			b->meshes = 1;
-			m->numindexes = webostate->batches[i].numidx;
-			m->vbofirstelement = webostate->batches[i].firstidx;
-			m->vbofirstvert = 0;
-			m->indexes = NULL;
 			b->vbo = &webostate->batches[i].vbo;
-			*b->vbo = *batch->vbo;
+			if (webostate->batches[i].inefficient)
+			{	//we had to generate new buffers because there's something evil in the shader..
+				m->indexes = webostate->batches[i].idxbuffer;
+				b->vbo->vao = 0;
+			}
+			else
+			{
+				*b->vbo = *batch->vbo;
+				if (b->shader->flags & SHADER_NEEDSARRAYS)
+				{	//this ebo cache stuff tracks only indexes, we don't know the actual surfs any more.
+					//if NEEDSARRAYS is flagged then the cpu will need access to the mesh data - which it doesn't have.
+					//while we could figure out this info, there would be a lot of vertexes that are not referenced, which would be horrendously slow.
+					if (b->shader->flags & SHADER_SKY)
+						continue;
+					b->shader = R_RegisterShader_Vertex(mod, "unsupported");
+				}
+				m->numvertexes = webostate->batches[i].b.vbo->vertcount;
+			}
 			b->vbo->indicies = webostate->ebo;
 			b->vbo->vao = 0;
+			m->numindexes = webostate->batches[i].numidx;
+			m->vbofirstelement = webostate->batches[i].firstidx;
+
 
 			b->next = webostate->rbatches[sortid];
 			webostate->rbatches[sortid] = b;
@@ -3201,11 +3277,42 @@ static void Surf_SimpleWorld_Q1BSP(struct webostate_s *es, qbyte *pvs)
 					if (eb->maxidx < eb->numidx + mesh->numindexes)
 					{
 						//FIXME: pre-allocate
-						eb->maxidx = eb->numidx + surf->mesh->numindexes + 512;
+						eb->maxidx = eb->numidx + mesh->numindexes + 512;
 						eb->idxbuffer = BZ_Realloc(eb->idxbuffer, eb->maxidx * sizeof(index_t));
 					}
-					for (i = 0; i < mesh->numindexes; i++)
-						eb->idxbuffer[eb->numidx+i] = mesh->indexes[i] + mesh->vbofirstvert;
+
+					if (eb->inefficient)
+					{	//slow path that needs to create new VBOs on the fly too.
+						if (eb->maxverts < eb->m.numvertexes + mesh->numvertexes)
+						{
+							//FIXME: pre-allocate
+							eb->maxverts = eb->m.numvertexes + mesh->numvertexes + 512;
+							eb->m.xyz_array			= BZ_Realloc(eb->m.xyz_array,			eb->maxverts * sizeof(*eb->m.xyz_array));
+							eb->m.st_array			= BZ_Realloc(eb->m.st_array,			eb->maxverts * sizeof(*eb->m.st_array));
+							eb->m.lmst_array[0]		= BZ_Realloc(eb->m.lmst_array[0],		eb->maxverts * sizeof(*eb->m.lmst_array[0]));
+							eb->m.normals_array		= BZ_Realloc(eb->m.normals_array,		eb->maxverts * sizeof(*eb->m.normals_array));
+							eb->m.snormals_array	= BZ_Realloc(eb->m.snormals_array,		eb->maxverts * sizeof(*eb->m.snormals_array));
+							eb->m.tnormals_array	= BZ_Realloc(eb->m.tnormals_array,		eb->maxverts * sizeof(*eb->m.tnormals_array));
+							eb->m.colors4f_array[0]	= BZ_Realloc(eb->m.colors4f_array[0],	eb->maxverts * sizeof(*eb->m.colors4f_array[0]));
+						}
+
+						memcpy(eb->m.xyz_array+eb->m.numvertexes,		mesh->xyz_array,		sizeof(*eb->m.xyz_array)*mesh->numvertexes);
+						memcpy(eb->m.st_array+eb->m.numvertexes,		mesh->st_array,			sizeof(*eb->m.st_array)*mesh->numvertexes);
+						memcpy(eb->m.lmst_array[0]+eb->m.numvertexes,	mesh->lmst_array[0],	sizeof(*eb->m.lmst_array[0])*mesh->numvertexes);
+						memcpy(eb->m.normals_array+eb->m.numvertexes,	mesh->normals_array,	sizeof(*eb->m.normals_array)*mesh->numvertexes);
+						memcpy(eb->m.snormals_array+eb->m.numvertexes,	mesh->snormals_array,	sizeof(*eb->m.snormals_array)*mesh->numvertexes);
+						memcpy(eb->m.tnormals_array+eb->m.numvertexes,	mesh->tnormals_array,	sizeof(*eb->m.tnormals_array)*mesh->numvertexes);
+						memcpy(eb->m.colors4f_array[0]+eb->m.numvertexes,mesh->colors4f_array[0],sizeof(*eb->m.colors4f_array[0])*mesh->numvertexes);
+
+						for (i = 0; i < mesh->numindexes; i++)
+							eb->idxbuffer[eb->numidx+i] = mesh->indexes[i] + eb->m.numvertexes;
+						eb->m.numvertexes+=mesh->numvertexes;
+					}
+					else
+					{
+						for (i = 0; i < mesh->numindexes; i++)
+							eb->idxbuffer[eb->numidx+i] = mesh->indexes[i] + mesh->vbofirstvert;
+					}
 					eb->numidx += mesh->numindexes;
 				}
 			}
@@ -3248,11 +3355,11 @@ static void Surf_SimpleWorld_Q3BSP(struct webostate_s *es, qbyte *pvs)
 	model_t *wmodel = es->wmodel;
 	int l = wmodel->numleafs;	//is this doing submodels too?
 	int c;
-	int fc = -r_framecount;
-	for (leaf = wmodel->leafs; l-- > 0; leaf++)
+	int fc = es->framecount;
+	for (leaf = wmodel->leafs; l --> 0; leaf++)
 	{
 		c = leaf->cluster;
-		if (c < 0)
+		if (c < 0 || !leaf->parent)
 			continue;	//o.O
 		if ((pvs[c>>3] & (1u<<(c&7))) && leaf->nummarksurfaces)
 		{
@@ -3272,11 +3379,40 @@ static void Surf_SimpleWorld_Q3BSP(struct webostate_s *es, qbyte *pvs)
 					if (eb->maxidx < eb->numidx + mesh->numindexes)
 					{
 						//FIXME: pre-allocate
-						eb->maxidx = eb->numidx + surf->mesh->numindexes + 512;
+						eb->maxidx = eb->numidx + mesh->numindexes + 512;
 						eb->idxbuffer = BZ_Realloc(eb->idxbuffer, eb->maxidx * sizeof(index_t));
 					}
-					for (i = 0; i < mesh->numindexes; i++)
-						eb->idxbuffer[eb->numidx+i] = mesh->indexes[i] + mesh->vbofirstvert;
+					if (eb->inefficient)
+					{	//slow path that needs to create a single ram-backed mesh
+						if (eb->maxverts < eb->m.numvertexes + mesh->numvertexes)
+						{
+							//FIXME: pre-allocate
+							eb->maxverts = eb->m.numvertexes + mesh->numvertexes + 512;
+							eb->m.xyz_array		= BZ_Realloc(eb->m.xyz_array,		eb->maxverts * sizeof(*eb->m.xyz_array));
+							eb->m.st_array		= BZ_Realloc(eb->m.st_array,		eb->maxverts * sizeof(*eb->m.st_array));
+							eb->m.lmst_array[0]	= BZ_Realloc(eb->m.lmst_array[0],	eb->maxverts * sizeof(*eb->m.lmst_array[0]));
+							eb->m.normals_array	= BZ_Realloc(eb->m.normals_array,	eb->maxverts * sizeof(*eb->m.normals_array));
+							eb->m.snormals_array= BZ_Realloc(eb->m.snormals_array,	eb->maxverts * sizeof(*eb->m.snormals_array));
+							eb->m.tnormals_array= BZ_Realloc(eb->m.tnormals_array,	eb->maxverts * sizeof(*eb->m.tnormals_array));
+							eb->m.colors4f_array[0]= BZ_Realloc(eb->m.colors4f_array[0],eb->maxverts * sizeof(*eb->m.colors4f_array[0]));
+						}
+						memcpy(eb->m.numvertexes+eb->m.xyz_array,		mesh->xyz_array,		sizeof(*eb->m.xyz_array)*mesh->numvertexes);
+						memcpy(eb->m.numvertexes+eb->m.st_array,		mesh->st_array,			sizeof(*eb->m.st_array)*mesh->numvertexes);
+						memcpy(eb->m.numvertexes+eb->m.lmst_array[0],	mesh->lmst_array[0],	sizeof(*eb->m.lmst_array[0])*mesh->numvertexes);
+						memcpy(eb->m.numvertexes+eb->m.normals_array,	mesh->normals_array,	sizeof(*eb->m.normals_array)*mesh->numvertexes);
+						memcpy(eb->m.numvertexes+eb->m.snormals_array,	mesh->snormals_array,	sizeof(*eb->m.snormals_array)*mesh->numvertexes);
+						memcpy(eb->m.numvertexes+eb->m.tnormals_array,	mesh->tnormals_array,	sizeof(*eb->m.tnormals_array)*mesh->numvertexes);
+						memcpy(eb->m.numvertexes+eb->m.colors4f_array[0],mesh->colors4f_array[0],sizeof(*eb->m.colors4f_array[0])*mesh->numvertexes);
+
+						for (i = 0; i < mesh->numindexes; i++)
+							eb->idxbuffer[eb->numidx+i] = mesh->indexes[i] + eb->m.numvertexes;
+						eb->m.numvertexes+=mesh->numvertexes;
+					}
+					else
+					{	//using the general prebaked entire-batch vbos
+						for (i = 0; i < mesh->numindexes; i++)
+							eb->idxbuffer[eb->numidx+i] = mesh->indexes[i] + mesh->vbofirstvert;
+					}
 					eb->numidx += mesh->numindexes;
 				}
 			}
@@ -3292,6 +3428,9 @@ void R_GenWorldEBO(void *ctx, void *data, size_t a, size_t b)
 
 	if (!es->numbatches)
 	{
+		int sortid;
+		batch_t *batch;
+
 		es->numbatches = es->wmodel->numbatches;
 
 		for (i = 0; i < es->numbatches; i++)
@@ -3300,7 +3439,25 @@ void R_GenWorldEBO(void *ctx, void *data, size_t a, size_t b)
 			es->batches[i].numidx = 0;
 			es->batches[i].maxidx = 0;
 			es->batches[i].idxbuffer = NULL;
+			es->batches[i].inefficient = false;
+
+			es->batches[i].maxverts = 0;
+			memset(&es->batches[i].m, 0, sizeof(es->batches[i].m));
+			memset(&es->batches[i].vbo, 0, sizeof(es->batches[i].vbo));
 		}
+
+		//set to 2 to reveal the inefficient surfaces...
+		if (r_temporalscenecache.ival < 2)
+			for (sortid = 0; sortid < SHADER_SORT_COUNT; sortid++)
+				for (batch = currentmodel->batches[sortid]; batch != NULL; batch = batch->next)
+				{
+#if MAXRLIGHTMAPS > 1
+					if (batch->lmlightstyle[1] != INVALID_LIGHTSTYLE || batch->vtlightstyle[1] != INVALID_VLIGHTSTYLE)
+						continue;	//not supported here, show fallback shader instead (would work but with screwed lighting, we prefer a better-defined result).
+#endif
+					if (!batch->shader || batch->shader->flags & SHADER_NEEDSARRAYS)
+						es->batches[batch->user.bmodel.ebobatch].inefficient = true;
+				}
 	}
 	else
 	{
@@ -3308,6 +3465,7 @@ void R_GenWorldEBO(void *ctx, void *data, size_t a, size_t b)
 		{
 			es->batches[i].firstidx = 0;
 			es->batches[i].numidx = 0;
+			es->batches[i].m.numvertexes = 0;
 		}
 	}
 
@@ -3381,6 +3539,15 @@ void Surf_DrawWorld (void)
 		Surf_LightmapShift(currentmodel);
 
 #ifdef THREADEDWORLD
+#warning Enable auto threaded world when ready
+/*
+		if (!*r_temporalscenecache.string && cl.worldmodel && cl.worldmodel->loadstate == MLS_LOADED && (cl.worldmodel->fromgame == fg_quake || cl.worldmodel->fromgame == fg_halflife))
+		{	//when empty, pick a suitable default.
+			//at what point is it a win? should we consider batch counts? probability of offscreen-only surfaces?
+			if (cl.worldmodel->fromgame == fg_quake || cl.worldmodel->fromgame == fg_halflife)
+				r_temporalscenecache.ival = cl.worldmodel->numleafs > 6000 && r_waterstyle.ival<=1 && r_telestyle.ival<=1 && r_slimestyle.ival<=1 && r_lavastyle.ival<=1;
+		}
+*/
 		if ((r_temporalscenecache.ival /*|| currentmodel->numbatches*/) && !r_refdef.recurse && currentmodel->type == mod_brush)
 		{
 			struct webostate_s *webostate, *best = NULL, *kill, **link;
@@ -3420,14 +3587,14 @@ void Surf_DrawWorld (void)
 			if (qrenderer != QR_OPENGL && qrenderer != QR_VULKAN)
 				;
 #ifdef Q1BSPS
-			else if (currentmodel->fromgame == fg_quake || currentmodel->fromgame == fg_halflife)
+			else if (currentmodel->fromgame == fg_quake || currentmodel->fromgame == fg_halflife || currentmodel->fromgame == fg_quake3)
 			{
 				if (!webogenerating)
 				{
 					qboolean gennew = false;
 					if (!webostate)
 						gennew = true;	//generate an initial one, if we can.
-					if (!gennew && webostate)
+					if (!gennew && webostate && currentmodel->fromgame != fg_quake3)
 					{
 						int i = cl_max_lightstyles;
 						for (i = 0; i < cl_max_lightstyles; i++)
@@ -3440,7 +3607,7 @@ void Surf_DrawWorld (void)
 						}
 					}
 
-					if (!gennew && webostate && (webostate->cluster[0] != r_viewcluster || webostate->cluster[1] != r_viewcluster2))
+					if (!gennew && webostate)// && (webostate->cluster[0] != r_viewcluster || webostate->cluster[1] != r_viewcluster2))
 					{
 						if (webostate->pvs.buffersize != currentmodel->pvsbytes || r_viewcluster2 != -1)
 							gennew = true;	//o.O
@@ -3491,11 +3658,13 @@ void Surf_DrawWorld (void)
 						if (!webogenerating)
 						{
 							webogenerating = BZ_Malloc(sizeof(*webogenerating) + sizeof(webogenerating->batches[0]) * (currentmodel->numbatches-1) + currentmodel->pvsbytes);
+							memset(&webogenerating->vbo, 0, sizeof(webogenerating->vbo));
 							memset(&webogenerating->ebo, 0, sizeof(webogenerating->ebo));
 							webogenerating->ebomem = NULL;
 							webogenerating->numbatches = 0;
 						}
 						webogenerating->wmodel = currentmodel;
+						webogenerating->framecount = -r_framecount;
 						webogenerating->cluster[0] = r_viewcluster;
 						webogenerating->cluster[1] = r_viewcluster2;
 						webogenerating->pvs.buffer = (qbyte*)(webogenerating+1) + sizeof(webogenerating->batches[0])*(currentmodel->numbatches-1);
@@ -3508,80 +3677,6 @@ void Surf_DrawWorld (void)
 				}
 			}
 #endif
-#ifdef Q3BSPS
-			else if (currentmodel->fromgame == fg_quake3)
-			{
-				if (!webogenerating)
-				{
-					qboolean gennew = false;
-					if (!webostate)
-						gennew = true;	//generate an initial one, if we can.
-
-					if (!gennew && webostate && (webostate->cluster[0] != r_viewcluster || webostate->cluster[1] != r_viewcluster2))
-					{
-						if (webostate->pvs.buffersize != currentmodel->pvsbytes || r_viewcluster2 != -1)
-							gennew = true;	//o.O
-						else if (memcmp(webostate->pvs.buffer, webostate->wmodel->funcs.ClusterPVS(webostate->wmodel, r_viewcluster, NULL, PVM_FAST), currentmodel->pvsbytes))
-							gennew = true;
-						else
-						{	//okay, so the pvs didn't change despite the clusters changing. this happens when using unvised maps or lots of func_detail
-							//just hack the cluster numbers so we don't have to do the memcmp above repeatedly for no reason.
-							webostate->cluster[0] = r_viewcluster;
-							webostate->cluster[1] = r_viewcluster2;
-						}
-					}
-
-					if (gennew)
-					{
-						if (!currentmodel->numbatches)
-						{
-							int sortid;
-							batch_t *batch;
-							currentmodel->numbatches = 0;
-							for (sortid = 0; sortid < SHADER_SORT_COUNT; sortid++)
-								for (batch = currentmodel->batches[sortid]; batch != NULL; batch = batch->next)
-								{
-									batch->user.bmodel.ebobatch = currentmodel->numbatches;
-									currentmodel->numbatches++;
-								}
-							/*TODO submodels too*/
-						}
-
-						webogeneratingstate = true;
-
-						webogenerating = NULL;
-						if (webostate)
-							webostate->lastvalid = cls.framecount;
-						for (link = &webostates; (kill=*link); )
-						{
-							if (kill->lastvalid < cls.framecount-5 && kill->wmodel == currentmodel)
-							{	//this one looks old... kill it.
-								if (webogenerating)
-									R_DestroyWorldEBO(webogenerating);	//can't use more than one!
-								webogenerating = kill;
-								*link = kill->next;
-							}
-							else
-								link = &(*link)->next;
-						}
-						if (!webogenerating)
-						{
-							webogenerating = BZ_Malloc(sizeof(*webogenerating) + sizeof(webogenerating->batches[0]) * (currentmodel->numbatches-1) + currentmodel->pvsbytes);
-							memset(&webogenerating->ebo, 0, sizeof(webogenerating->ebo));
-							webogenerating->ebomem = NULL;
-							webogenerating->numbatches = 0;
-						}
-						webogenerating->wmodel = currentmodel;
-						webogenerating->cluster[0] = r_viewcluster;
-						webogenerating->cluster[1] = r_viewcluster2;
-						webogenerating->pvs.buffer = (qbyte*)(webogenerating+1) + sizeof(webogenerating->batches[0])*(currentmodel->numbatches-1);
-						webogenerating->pvs.buffersize = currentmodel->pvsbytes;
-						Q_strncpyz(webogenerating->dbgid, "webostate", sizeof(webogenerating->dbgid));
-						COM_AddWork(WG_LOADER, R_GenWorldEBO, webogenerating, NULL, 0, 0);
-					}
-				}
-			}
-#endif
 
 			if (webostate)
 			{
@@ -4495,6 +4590,10 @@ void Surf_NewMap (void)
 	}
 	Shader_DoReload();
 
+#ifdef THREADEDWORLD
+	Cvar_ForceCallback(&r_temporalscenecache);
+#endif
+
 	if (!pe)
 		Cvar_ForceCallback(&r_particlesystem);
 	R_Clutter_Purge();
diff --git a/engine/client/renderer.c b/engine/client/renderer.c
index 851c36f47..96f182b22 100644
--- a/engine/client/renderer.c
+++ b/engine/client/renderer.c
@@ -174,7 +174,7 @@ cvar_t r_drawviewmodel						= CVARF  ("r_drawviewmodel", "1", CVAR_ARCHIVE);
 cvar_t r_drawviewmodelinvis					= CVAR  ("r_drawviewmodelinvis", "0");
 cvar_t r_dynamic							= CVARFD ("r_dynamic", IFMINIMAL("0","1"),
 													  CVAR_ARCHIVE, "0: no standard dlights at all.\n1: coloured dlights will be used, they may show through walls. These are not realtime things.\n2: The dlights will be forced to monochrome (this does not affect coronas/flashblends/rtlights attached to the same light).");
-cvar_t r_temporalscenecache					= CVARFD ("r_temporalscenecache", "", CVAR_ARCHIVE, "Controls whether to generate+reuse a scene cache over multiple frames. This is generated on a separate thread to avoid any associated costs. This can significantly boost framerates on complex maps, but can also stress the gpu more (performance tradeoff that varies per map). An outdated cache may be used if the cache takes too long to build (eg: lightmap animations), which could cause the odd glitch when moving fast (but retain more consistent framerates - another tradeoff).\n0: Tranditional quake rendering.\n1: Generate+Use the scene cache.");
+cvar_t r_temporalscenecache					= CVARAFD ("r_temporalscenecache", "", "r_scenecache", CVAR_ARCHIVE, "Controls whether to generate+reuse a scene cache over multiple frames. This is generated on a separate thread to avoid any associated costs. This can significantly boost framerates on complex maps, but can also stress the gpu more (performance tradeoff that varies per map). An outdated cache may be used if the cache takes too long to build (eg: lightmap animations), which could cause the odd glitch when moving fast (but retain more consistent framerates - another tradeoff).\n0: Tranditional quake rendering.\n1: Generate+Use the scene cache.");
 cvar_t r_fastturb							= CVARF ("r_fastturb", "0",
 													CVAR_SHADERSYSTEM);
 cvar_t r_skycloudalpha						= CVARFD ("r_skycloudalpha", "1", CVAR_RENDERERLATCH, "Controls how opaque the front layer of legacy scrolling skies should be.");
diff --git a/engine/gl/gl_backend.c b/engine/gl/gl_backend.c
index 4af1bd568..475d6b589 100644
--- a/engine/gl/gl_backend.c
+++ b/engine/gl/gl_backend.c
@@ -2683,7 +2683,7 @@ static void alphagen(const shaderpass_t *pass, int cnt, avec4_t *const src, avec
 
 	case ALPHA_GEN_PORTAL:
 		//FIXME: should this be per-vert?
-		if (r_refdef.recurse)
+		if (r_refdef.recurse || !mesh->xyz_array)
 			f = 1;
 		else
 		{
diff --git a/engine/gl/gl_model.c b/engine/gl/gl_model.c
index ab0ff6736..e90db9f62 100644
--- a/engine/gl/gl_model.c
+++ b/engine/gl/gl_model.c
@@ -2675,6 +2675,8 @@ batch->firstmesh is set only in and for this function, its cleared out elsewhere
 */
 static int Mod_Batches_Generate(model_t *mod)
 {
+//#define NOBATCH	//define this to force each surface into its own batch...
+
 	int i;
 	msurface_t *surf;
 	shader_t *shader;
@@ -2752,17 +2754,21 @@ static int Mod_Batches_Generate(model_t *mod)
 			plane[3] = 0;
 		}
 
+#ifdef NOBATCH
+		batch = NULL;
+		(void)lbatch;
+#else
 		if (lbatch && (
 					lbatch->texture == surf->texinfo->texture &&
 					lbatch->shader == shader &&
 					lbatch->lightmap[0] == lmmerge(surf->lightmaptexturenums[0]) &&
 					Vector4Compare(plane, lbatch->user.bmodel.plane) &&
 					lbatch->firstmesh + surf->mesh->numvertexes <= MAX_INDICIES &&
-#if MAXRLIGHTMAPS > 1
+	#if MAXRLIGHTMAPS > 1
 					lbatch->lightmap[1] == lmmerge(surf->lightmaptexturenums[1]) &&
 					lbatch->lightmap[2] == lmmerge(surf->lightmaptexturenums[2]) &&
 					lbatch->lightmap[3] == lmmerge(surf->lightmaptexturenums[3]) &&
-#endif
+	#endif
 					lbatch->fog == surf->fog &&
 					lbatch->envmap == envmap))
 			batch = lbatch;
@@ -2776,16 +2782,17 @@ static int Mod_Batches_Generate(model_t *mod)
 							batch->lightmap[0] == lmmerge(surf->lightmaptexturenums[0]) &&
 							Vector4Compare(plane, batch->user.bmodel.plane) &&
 							batch->firstmesh + surf->mesh->numvertexes <= MAX_INDICIES &&
-#if MAXRLIGHTMAPS > 1
+	#if MAXRLIGHTMAPS > 1
 							batch->lightmap[1] == lmmerge(surf->lightmaptexturenums[1]) &&
 							batch->lightmap[2] == lmmerge(surf->lightmaptexturenums[2]) &&
 							batch->lightmap[3] == lmmerge(surf->lightmaptexturenums[3]) &&
-#endif
+	#endif
 							batch->fog == surf->fog &&
 							batch->envmap == envmap)
 					break;
 			}
 		}
+#endif
 		if (!batch)
 		{
 			batch = ZG_Malloc(&mod->memgroup, sizeof(*batch));
@@ -2824,6 +2831,7 @@ static int Mod_Batches_Generate(model_t *mod)
 
 			mod->batches[sortid] = batch;
 		}
+		batch->user.bmodel.ebobatch = -1;
 
 		surf->sbatch = batch;	//let the surface know which batch its in
 		batch->maxmeshes++;
diff --git a/engine/gl/gl_model.h b/engine/gl/gl_model.h
index 02ea44c62..0b293eee4 100644
--- a/engine/gl/gl_model.h
+++ b/engine/gl/gl_model.h
@@ -159,8 +159,8 @@ typedef struct batch_s
 	{
 		struct
 		{
-			unsigned int shadowbatch;	//a unique index to accelerate shadowmesh generation (dlights, yay!)
 			unsigned int ebobatch;		//temporal scene cache stuff, basically just a simple index so we don't have to deal with shader sort values when generating new index lists.
+			unsigned int shadowbatch;	//a unique index to accelerate shadowmesh generation (dlights, yay!)
 //		} bmodel;
 //		struct
 //		{
@@ -186,6 +186,7 @@ typedef struct batch_s
 		} surf;*/
 		struct
 		{
+			unsigned int ebobatch;		//temporal scene cache stuff, basically just a simple index so we don't have to deal with shader sort values when generating new index lists.
 			mesh_t meshbuf;
 			mesh_t *meshptr;
 		};
diff --git a/engine/gl/gl_rmain.c b/engine/gl/gl_rmain.c
index 3f1d5e5f3..ea963dffb 100644
--- a/engine/gl/gl_rmain.c
+++ b/engine/gl/gl_rmain.c
@@ -1121,40 +1121,42 @@ void GLR_DrawPortal(batch_t *batch, batch_t **blist, batch_t *depthmasklist[2],
 	pvsbuffer_t newvis;
 	float ivmat[16], trmat[16];
 
-	if (!mesh->xyz_array)
+	if (mesh->xyz_array)
+	{
+		if (!mesh->normals_array)
+		{
+			VectorSet(plane.normal, 0, 0, 1);
+		}
+		else
+		{
+			VectorCopy(mesh->normals_array[0], plane.normal);
+		}
+
+		if (batch->ent == &r_worldentity)
+		{
+			plane.dist = DotProduct(mesh->xyz_array[0], plane.normal);
+		}
+		else
+		{
+			vec3_t point;
+			VectorCopy(plane.normal, oplane.normal);
+			//rotate the surface normal around its entity's matrix
+			plane.normal[0] = oplane.normal[0]*batch->ent->axis[0][0] + oplane.normal[1]*batch->ent->axis[1][0] + oplane.normal[2]*batch->ent->axis[2][0];
+			plane.normal[1] = oplane.normal[0]*batch->ent->axis[0][1] + oplane.normal[1]*batch->ent->axis[1][1] + oplane.normal[2]*batch->ent->axis[2][1];
+			plane.normal[2] = oplane.normal[0]*batch->ent->axis[0][2] + oplane.normal[1]*batch->ent->axis[1][2] + oplane.normal[2]*batch->ent->axis[2][2];
+
+			//rotate some point on the mesh around its entity's matrix
+			point[0] = mesh->xyz_array[0][0]*batch->ent->axis[0][0] + mesh->xyz_array[0][1]*batch->ent->axis[1][0] + mesh->xyz_array[0][2]*batch->ent->axis[2][0] + batch->ent->origin[0];
+			point[1] = mesh->xyz_array[0][0]*batch->ent->axis[0][1] + mesh->xyz_array[0][1]*batch->ent->axis[1][1] + mesh->xyz_array[0][2]*batch->ent->axis[2][1] + batch->ent->origin[1];
+			point[2] = mesh->xyz_array[0][0]*batch->ent->axis[0][2] + mesh->xyz_array[0][1]*batch->ent->axis[1][2] + mesh->xyz_array[0][2]*batch->ent->axis[2][2] + batch->ent->origin[2];
+
+			//now we can figure out the plane dist
+			plane.dist = DotProduct(point, plane.normal);
+		}
+	}
+	else
 		return;
 
-	if (!mesh->normals_array)
-	{
-		VectorSet(plane.normal, 0, 0, 1);
-	}
-	else
-	{
-		VectorCopy(mesh->normals_array[0], plane.normal);
-	}
-
-	if (batch->ent == &r_worldentity)
-	{
-		plane.dist = DotProduct(mesh->xyz_array[0], plane.normal);
-	}
-	else
-	{
-		vec3_t point;
-		VectorCopy(plane.normal, oplane.normal);
-		//rotate the surface normal around its entity's matrix
-		plane.normal[0] = oplane.normal[0]*batch->ent->axis[0][0] + oplane.normal[1]*batch->ent->axis[1][0] + oplane.normal[2]*batch->ent->axis[2][0];
-		plane.normal[1] = oplane.normal[0]*batch->ent->axis[0][1] + oplane.normal[1]*batch->ent->axis[1][1] + oplane.normal[2]*batch->ent->axis[2][1];
-		plane.normal[2] = oplane.normal[0]*batch->ent->axis[0][2] + oplane.normal[1]*batch->ent->axis[1][2] + oplane.normal[2]*batch->ent->axis[2][2];
-
-		//rotate some point on the mesh around its entity's matrix
-		point[0] = mesh->xyz_array[0][0]*batch->ent->axis[0][0] + mesh->xyz_array[0][1]*batch->ent->axis[1][0] + mesh->xyz_array[0][2]*batch->ent->axis[2][0] + batch->ent->origin[0];
-		point[1] = mesh->xyz_array[0][0]*batch->ent->axis[0][1] + mesh->xyz_array[0][1]*batch->ent->axis[1][1] + mesh->xyz_array[0][2]*batch->ent->axis[2][1] + batch->ent->origin[1];
-		point[2] = mesh->xyz_array[0][0]*batch->ent->axis[0][2] + mesh->xyz_array[0][1]*batch->ent->axis[1][2] + mesh->xyz_array[0][2]*batch->ent->axis[2][2] + batch->ent->origin[2];
-
-		//now we can figure out the plane dist
-		plane.dist = DotProduct(point, plane.normal);
-	}
-
 	//if we're too far away from the surface, don't draw anything
 	if (batch->shader->flags & SHADER_AGEN_PORTAL)
 	{
@@ -1211,12 +1213,15 @@ void GLR_DrawPortal(batch_t *batch, batch_t **blist, batch_t *depthmasklist[2],
 			int clust, i, j;
 			float d;
 			vec3_t point;
-			r_refdef.forcevis = true;
+			r_refdef.forcevis = false;
 			r_refdef.forcedvis = NULL;
 			newvis.buffer = alloca(newvis.buffersize=cl.worldmodel->pvsbytes);
 			for (i = batch->firstmesh; i < batch->meshes; i++)
 			{
 				mesh = batch->mesh[i];
+				if (!mesh->xyz_array)
+					continue;
+				r_refdef.forcevis = true;
 				VectorClear(point);
 				for (j = 0; j < mesh->numvertexes; j++)
 					VectorAdd(point, mesh->xyz_array[j], point);
diff --git a/engine/gl/gl_shader.c b/engine/gl/gl_shader.c
index a48480f8e..03b315ff9 100644
--- a/engine/gl/gl_shader.c
+++ b/engine/gl/gl_shader.c
@@ -5760,7 +5760,9 @@ done:;
 #endif
 		s->passes->numMergedPasses = s->numpasses;
 	}
-	else if (s->numdeforms)
+	else if(s->numdeforms ||
+			s->sort == SHADER_SORT_PORTAL ||	//q3-style portals (needed for pvs info)
+			s->flags & (SHADER_HASREFLECT|SHADER_HASREFRACT)) //water effects (needed for pvs info)
 		s->flags |= SHADER_NEEDSARRAYS;
 	else
 	{
@@ -5774,10 +5776,17 @@ done:;
 				s->flags |= SHADER_NEEDSARRAYS;
 				break;
 			}
+			if (pass->alphagen == ALPHA_GEN_PORTAL ||	//needs xyz
+				pass->alphagen == ALPHA_GEN_SPECULAR)	//needs xyz+norm
+			{
+				s->flags |= SHADER_NEEDSARRAYS;
+				break;
+			}
 			if (!(pass->flags & SHADER_PASS_NOCOLORARRAY))
 			{
 				if (!(((pass->rgbgen == RGB_GEN_VERTEX_LIGHTING) ||
 					(pass->rgbgen == RGB_GEN_VERTEX_EXACT) ||
+					(pass->alphagen == ALPHA_GEN_VERTEX) ||
 					(pass->rgbgen == RGB_GEN_ONE_MINUS_VERTEX)) &&
 					(pass->alphagen == ALPHA_GEN_VERTEX)))
 				{