From 72a1fef7147623a179b2023e35ec98433ef13c16 Mon Sep 17 00:00:00 2001 From: Bill Currie Date: Thu, 29 Jul 2021 11:49:18 +0900 Subject: [PATCH] [qfvis] Use hunk to manage winding memory It turns out cmem is not so good for many large allocations (probably a bug in handling the blocks), but was really meant for lots of little churning allocations anyway. After an analysis of winding lifetimes, it became clear that the hunk allocator would work very well. The base windings are allocated from a global hunk (currently 1GB, plenty for even ad_tears), and ephemeral windings are allocated from a per-thread hunk of 1MB (seems to be way more than enough: gmsp3v2 uses a maximum of only 56064 bytes, and ad_tears got through 30% before I gave up on it). Any speed difference (for gmsp3v2) seems to be lost in the noise: still completing in 38.4s on my machine. --- tools/qfvis/include/vis.h | 9 ++++--- tools/qfvis/source/flow.c | 17 ++++++------- tools/qfvis/source/qfvis.c | 49 ++++++++++++-------------------------- 3 files changed, 26 insertions(+), 49 deletions(-) diff --git a/tools/qfvis/include/vis.h b/tools/qfvis/include/vis.h index 54b9de0d1..a96ce18ac 100644 --- a/tools/qfvis/include/vis.h +++ b/tools/qfvis/include/vis.h @@ -76,6 +76,7 @@ extern pthread_rwlock_t *stats_lock; #include "QF/cmem.h" #include "QF/dstring.h" #include "QF/set.h" +#include "QF/zone.h" #include "QF/simd/vec4f.h" #define MAX_PORTALS 32768 @@ -92,7 +93,7 @@ typedef struct winding_s { unsigned numpoints; int id; int thread; - vec4f_t points[MAX_PORTALS_ON_CLUSTER]; // variable sized + vec4f_t points[1]; // variable sized } winding_t; typedef enum { @@ -161,9 +162,7 @@ typedef struct { unsigned sep_free; ///< how many separators were freed unsigned sep_highwater; ///< most separators in flight unsigned sep_maxbulk; ///< most separators freed at once - unsigned winding_alloc; ///< how many windings were allocated - unsigned winding_free; ///< how many windings were freed - unsigned winding_highwater; ///< most windings in flight + size_t winding_mark; ///< most memory allocated to windings unsigned stack_alloc; ///< how many stack blocks were allocated unsigned stack_free; ///< how many stack blocks were freed } visstat_t; @@ -176,6 +175,7 @@ typedef struct threaddata_s { sep_t *sep_freelist; ///< per-thread list of free separators winding_t *winding_freelist; ///< per-thread list of free windings memsuper_t *memsuper; ///< per-thread memory pool + memhunk_t *hunk; dstring_t *str; set_pool_t set_pool; int id; @@ -203,7 +203,6 @@ extern cluster_t *clusters; extern int *leafcluster; extern byte *uncompressed; -void FreeWinding (threaddata_t *thread, winding_t *w); winding_t *NewWinding (threaddata_t *thread, int points); winding_t *ClipWinding (threaddata_t *thread, winding_t *in, vec4f_t split, qboolean keepon); diff --git a/tools/qfvis/source/flow.c b/tools/qfvis/source/flow.c index 14e039ead..e754cbbdb 100644 --- a/tools/qfvis/source/flow.c +++ b/tools/qfvis/source/flow.c @@ -345,13 +345,18 @@ RecursiveClusterFlow (int clusternum, threaddata_t *thread, pstack_t *prevstack) pass_winding = prevstack->pass_winding; pass_plane = prevstack->pass_plane; + size_t winding_mark = Hunk_LowMark (thread->hunk); + // check all portals for flowing into other clusters for (i = 0; i < cluster->numportals; i++) { target_portal = cluster->portals[i]; - if (!set_is_member (prevstack->mightsee, target_portal->cluster)) continue; // can't possibly see it + thread->stats.winding_mark = max (thread->stats.winding_mark, + Hunk_LowMark (thread->hunk)); + Hunk_RawFreeToLowMark (thread->hunk, winding_mark); + // if target_portal can't see anything we haven't already seen, skip it test = select_test_set (target_portal, thread); if (!mightsee_more (might, prevstack->mightsee, test, vis)) { @@ -385,7 +390,6 @@ RecursiveClusterFlow (int clusternum, threaddata_t *thread, pstack_t *prevstack) stack->pass_portal = target_portal; RecursiveClusterFlow (target_portal->cluster, thread, stack); - FreeWinding (thread, target_winding); continue; } @@ -399,7 +403,6 @@ RecursiveClusterFlow (int clusternum, threaddata_t *thread, pstack_t *prevstack) source_winding = ClipWinding (thread, source_winding, backplane, false); if (!source_winding) { - FreeWinding (thread, target_winding); continue; } @@ -418,7 +421,6 @@ RecursiveClusterFlow (int clusternum, threaddata_t *thread, pstack_t *prevstack) target_winding); if (!target_winding) { thread->stats.targetclipped++; - FreeWinding (thread, source_winding); continue; } if (target_winding != old) @@ -437,7 +439,6 @@ RecursiveClusterFlow (int clusternum, threaddata_t *thread, pstack_t *prevstack) target_winding); if (!target_winding) { thread->stats.targetclipped++; - FreeWinding (thread, source_winding); continue; } if (target_winding != old) @@ -455,7 +456,6 @@ RecursiveClusterFlow (int clusternum, threaddata_t *thread, pstack_t *prevstack) free_separators (thread, sep); if (!source_winding) { thread->stats.sourceclipped++; - FreeWinding (thread, target_winding); continue; } if (source_winding != old) @@ -471,7 +471,6 @@ RecursiveClusterFlow (int clusternum, threaddata_t *thread, pstack_t *prevstack) free_separators (thread, sep); if (!source_winding) { thread->stats.sourceclipped++; - FreeWinding (thread, target_winding); continue; } if (source_winding != old) @@ -488,10 +487,8 @@ RecursiveClusterFlow (int clusternum, threaddata_t *thread, pstack_t *prevstack) // flow through it for real RecursiveClusterFlow (target_portal->cluster, thread, stack); - - FreeWinding (thread, source_winding); - FreeWinding (thread, target_winding); } + Hunk_RawFreeToLowMark (thread->hunk, winding_mark); free_separators (thread, stack->separators[1]); free_separators (thread, stack->separators[0]); } diff --git a/tools/qfvis/source/qfvis.c b/tools/qfvis/source/qfvis.c index 94ba1507f..e37ebe5fd 100644 --- a/tools/qfvis/source/qfvis.c +++ b/tools/qfvis/source/qfvis.c @@ -156,32 +156,15 @@ NewWinding (threaddata_t *thread, int points) winding_t *winding; unsigned size; - if (points > MAX_POINTS_ON_WINDING) - Sys_Error ("NewWinding: %i points", points); - size = field_offset (winding_t, points[points]); - winding = CMEMALLOC (13, winding_t, thread->winding, thread->memsuper); + winding = Hunk_RawAlloc (thread->hunk, size); memset (winding, 0, size); - thread->stats.winding_alloc++; winding->id = thread->winding_id++; winding->thread = thread->id; return winding; } -void -FreeWinding (threaddata_t *thread, winding_t *w) -{ - if (!w->original) { - unsigned count = thread->stats.winding_alloc - thread->stats.winding_free; - if (count > thread->stats.winding_highwater) { - thread->stats.winding_highwater = count; - } - thread->stats.winding_free++; - CMEMFREE (thread->winding, w); - } -} - winding_t * CopyWinding (threaddata_t *thread, const winding_t *w) { @@ -189,10 +172,9 @@ CopyWinding (threaddata_t *thread, const winding_t *w) winding_t *copy; size = field_offset (winding_t, points[w->numpoints]); - copy = CMEMALLOC (13, winding_t, thread->winding, thread->memsuper); + copy = Hunk_RawAlloc (thread->hunk, size); memcpy (copy, w, size); copy->original = false; - thread->stats.winding_alloc++; copy->id = thread->winding_id++; copy->thread = thread->id; return copy; @@ -362,7 +344,6 @@ ClipWinding (threaddata_t *thread, winding_t *in, vec4f_t split, return in; } if (!counts[SIDE_FRONT]) { - FreeWinding (thread, in); return NULL; } if (!counts[SIDE_BACK]) { @@ -397,8 +378,6 @@ ClipWinding (threaddata_t *thread, winding_t *in, vec4f_t split, Sys_Error ("ClipWinding: points exceeded estimate: n:%u m:%u", neww->numpoints, maxpts); } - // free the original winding - FreeWinding (thread, in); return neww; } @@ -461,8 +440,8 @@ UpdateStats (threaddata_t *thread) global_stats.mightseeupdate += thread->stats.mightseeupdate; global_stats.sep_alloc += thread->stats.sep_alloc; global_stats.sep_free += thread->stats.sep_free; - global_stats.winding_alloc += thread->stats.winding_alloc; - global_stats.winding_free += thread->stats.winding_free; + global_stats.winding_mark = max (global_stats.winding_mark, + thread->stats.winding_mark); global_stats.stack_alloc += thread->stats.stack_alloc; global_stats.stack_free += thread->stats.stack_free; UNLOCK (stats_lock); @@ -569,8 +548,10 @@ LeafThread (void *_thread) int thread = (int) (intptr_t) _thread; threaddata_t data; int count = 0; + size_t thread_memsize = 1024 * 1024; memset (&data, 0, sizeof (data)); + data.hunk = Hunk_Init (Sys_Alloc (thread_memsize), thread_memsize); set_pool_init (&data.set_pool); data.id = thread; data.memsuper = new_memsuper (); @@ -588,13 +569,13 @@ LeafThread (void *_thread) PortalFlow (&data, portal); - int whw = data.stats.winding_highwater; + int whm = data.stats.winding_mark; int shw = data.stats.sep_highwater; int smb = data.stats.sep_maxbulk; PortalCompleted (&data, portal); data.stats.sep_highwater = shw; data.stats.sep_maxbulk = smb; - data.stats.winding_highwater = whw; + data.stats.winding_mark = whm; if (options.verbosity >= 4) printf ("portal:%5i mightsee:%5i cansee:%5i %5u/%u\n", @@ -605,8 +586,8 @@ LeafThread (void *_thread) } while (1); if (options.verbosity >= 2) { - printf ("thread %d winding highwater: %d\n", thread, - data.stats.winding_highwater); + printf ("thread %d winding mark: %zd\n", thread, + data.stats.winding_mark); printf ("thread %d separator highwater: %d\n", thread, data.stats.sep_highwater); printf ("thread %d separator maxbulk: %d\n", thread, @@ -619,6 +600,8 @@ LeafThread (void *_thread) working[thread] = -1; delete_memsuper (data.memsuper); dstring_delete (data.str); + + Sys_Free (data.hunk, thread_memsize); return NULL; } @@ -965,9 +948,7 @@ CalcPortalVis (void) printf ("separators allocated: %u freed: %u %u\n", global_stats.sep_alloc, global_stats.sep_free, global_stats.sep_alloc - global_stats.sep_free); - printf ("windings allocated: %u freed: %u %u\n", - global_stats.winding_alloc, global_stats.winding_free, - global_stats.winding_alloc - global_stats.winding_free); + printf ("max windings mark: %zd\n", global_stats.winding_mark); printf ("stack blocks allocated: %u freed: %u %u\n", global_stats.stack_alloc, global_stats.stack_free, global_stats.stack_alloc - global_stats.stack_free); @@ -1283,8 +1264,6 @@ LoadPortals (char *name) line = err; } - if (numpoints > MAX_POINTS_ON_WINDING) - Sys_Error ("LoadPortals: portal %u has too many points", i); if ((unsigned) clusternums[0] > (unsigned) portalclusters || (unsigned) clusternums[1] > (unsigned) portalclusters) Sys_Error ("LoadPortals: reading portal %u", i); @@ -1413,8 +1392,10 @@ main (int argc, char **argv) { double start, stop; QFile *f; + size_t main_memsize = 1024 * 1024 * 1024; main_thread.memsuper = new_memsuper (); + main_thread.hunk = Hunk_Init (Sys_Alloc (main_memsize), main_memsize); start = Sys_DoubleTime ();