[qfvis] Use hunk to manage winding memory

It turns out cmem is not so good for many large allocations (probably a
bug in handling the blocks), but was really meant for lots of little
churning allocations anyway. After an analysis of winding lifetimes, it
became clear that the hunk allocator would work very well. The base
windings are allocated from a global hunk (currently 1GB, plenty for
even ad_tears), and ephemeral windings are allocated from a per-thread
hunk of 1MB (seems to be way more than enough: gmsp3v2 uses a maximum of
only 56064 bytes, and ad_tears got through 30% before I gave up on it).
Any speed difference (for gmsp3v2) seems to be lost in the noise: still
completing in 38.4s on my machine.
This commit is contained in:
Bill Currie 2021-07-29 11:49:18 +09:00
parent 8f376a48f8
commit 72a1fef714
3 changed files with 26 additions and 49 deletions

View file

@ -76,6 +76,7 @@ extern pthread_rwlock_t *stats_lock;
#include "QF/cmem.h" #include "QF/cmem.h"
#include "QF/dstring.h" #include "QF/dstring.h"
#include "QF/set.h" #include "QF/set.h"
#include "QF/zone.h"
#include "QF/simd/vec4f.h" #include "QF/simd/vec4f.h"
#define MAX_PORTALS 32768 #define MAX_PORTALS 32768
@ -92,7 +93,7 @@ typedef struct winding_s {
unsigned numpoints; unsigned numpoints;
int id; int id;
int thread; int thread;
vec4f_t points[MAX_PORTALS_ON_CLUSTER]; // variable sized vec4f_t points[1]; // variable sized
} winding_t; } winding_t;
typedef enum { typedef enum {
@ -161,9 +162,7 @@ typedef struct {
unsigned sep_free; ///< how many separators were freed unsigned sep_free; ///< how many separators were freed
unsigned sep_highwater; ///< most separators in flight unsigned sep_highwater; ///< most separators in flight
unsigned sep_maxbulk; ///< most separators freed at once unsigned sep_maxbulk; ///< most separators freed at once
unsigned winding_alloc; ///< how many windings were allocated size_t winding_mark; ///< most memory allocated to windings
unsigned winding_free; ///< how many windings were freed
unsigned winding_highwater; ///< most windings in flight
unsigned stack_alloc; ///< how many stack blocks were allocated unsigned stack_alloc; ///< how many stack blocks were allocated
unsigned stack_free; ///< how many stack blocks were freed unsigned stack_free; ///< how many stack blocks were freed
} visstat_t; } visstat_t;
@ -176,6 +175,7 @@ typedef struct threaddata_s {
sep_t *sep_freelist; ///< per-thread list of free separators sep_t *sep_freelist; ///< per-thread list of free separators
winding_t *winding_freelist; ///< per-thread list of free windings winding_t *winding_freelist; ///< per-thread list of free windings
memsuper_t *memsuper; ///< per-thread memory pool memsuper_t *memsuper; ///< per-thread memory pool
memhunk_t *hunk;
dstring_t *str; dstring_t *str;
set_pool_t set_pool; set_pool_t set_pool;
int id; int id;
@ -203,7 +203,6 @@ extern cluster_t *clusters;
extern int *leafcluster; extern int *leafcluster;
extern byte *uncompressed; extern byte *uncompressed;
void FreeWinding (threaddata_t *thread, winding_t *w);
winding_t *NewWinding (threaddata_t *thread, int points); winding_t *NewWinding (threaddata_t *thread, int points);
winding_t *ClipWinding (threaddata_t *thread, winding_t *in, vec4f_t split, winding_t *ClipWinding (threaddata_t *thread, winding_t *in, vec4f_t split,
qboolean keepon); qboolean keepon);

View file

@ -345,13 +345,18 @@ RecursiveClusterFlow (int clusternum, threaddata_t *thread, pstack_t *prevstack)
pass_winding = prevstack->pass_winding; pass_winding = prevstack->pass_winding;
pass_plane = prevstack->pass_plane; pass_plane = prevstack->pass_plane;
size_t winding_mark = Hunk_LowMark (thread->hunk);
// check all portals for flowing into other clusters // check all portals for flowing into other clusters
for (i = 0; i < cluster->numportals; i++) { for (i = 0; i < cluster->numportals; i++) {
target_portal = cluster->portals[i]; target_portal = cluster->portals[i];
if (!set_is_member (prevstack->mightsee, target_portal->cluster)) if (!set_is_member (prevstack->mightsee, target_portal->cluster))
continue; // can't possibly see it continue; // can't possibly see it
thread->stats.winding_mark = max (thread->stats.winding_mark,
Hunk_LowMark (thread->hunk));
Hunk_RawFreeToLowMark (thread->hunk, winding_mark);
// if target_portal can't see anything we haven't already seen, skip it // if target_portal can't see anything we haven't already seen, skip it
test = select_test_set (target_portal, thread); test = select_test_set (target_portal, thread);
if (!mightsee_more (might, prevstack->mightsee, test, vis)) { if (!mightsee_more (might, prevstack->mightsee, test, vis)) {
@ -385,7 +390,6 @@ RecursiveClusterFlow (int clusternum, threaddata_t *thread, pstack_t *prevstack)
stack->pass_portal = target_portal; stack->pass_portal = target_portal;
RecursiveClusterFlow (target_portal->cluster, thread, stack); RecursiveClusterFlow (target_portal->cluster, thread, stack);
FreeWinding (thread, target_winding);
continue; continue;
} }
@ -399,7 +403,6 @@ RecursiveClusterFlow (int clusternum, threaddata_t *thread, pstack_t *prevstack)
source_winding = ClipWinding (thread, source_winding, backplane, false); source_winding = ClipWinding (thread, source_winding, backplane, false);
if (!source_winding) { if (!source_winding) {
FreeWinding (thread, target_winding);
continue; continue;
} }
@ -418,7 +421,6 @@ RecursiveClusterFlow (int clusternum, threaddata_t *thread, pstack_t *prevstack)
target_winding); target_winding);
if (!target_winding) { if (!target_winding) {
thread->stats.targetclipped++; thread->stats.targetclipped++;
FreeWinding (thread, source_winding);
continue; continue;
} }
if (target_winding != old) if (target_winding != old)
@ -437,7 +439,6 @@ RecursiveClusterFlow (int clusternum, threaddata_t *thread, pstack_t *prevstack)
target_winding); target_winding);
if (!target_winding) { if (!target_winding) {
thread->stats.targetclipped++; thread->stats.targetclipped++;
FreeWinding (thread, source_winding);
continue; continue;
} }
if (target_winding != old) if (target_winding != old)
@ -455,7 +456,6 @@ RecursiveClusterFlow (int clusternum, threaddata_t *thread, pstack_t *prevstack)
free_separators (thread, sep); free_separators (thread, sep);
if (!source_winding) { if (!source_winding) {
thread->stats.sourceclipped++; thread->stats.sourceclipped++;
FreeWinding (thread, target_winding);
continue; continue;
} }
if (source_winding != old) if (source_winding != old)
@ -471,7 +471,6 @@ RecursiveClusterFlow (int clusternum, threaddata_t *thread, pstack_t *prevstack)
free_separators (thread, sep); free_separators (thread, sep);
if (!source_winding) { if (!source_winding) {
thread->stats.sourceclipped++; thread->stats.sourceclipped++;
FreeWinding (thread, target_winding);
continue; continue;
} }
if (source_winding != old) if (source_winding != old)
@ -488,10 +487,8 @@ RecursiveClusterFlow (int clusternum, threaddata_t *thread, pstack_t *prevstack)
// flow through it for real // flow through it for real
RecursiveClusterFlow (target_portal->cluster, thread, stack); RecursiveClusterFlow (target_portal->cluster, thread, stack);
FreeWinding (thread, source_winding);
FreeWinding (thread, target_winding);
} }
Hunk_RawFreeToLowMark (thread->hunk, winding_mark);
free_separators (thread, stack->separators[1]); free_separators (thread, stack->separators[1]);
free_separators (thread, stack->separators[0]); free_separators (thread, stack->separators[0]);
} }

View file

@ -156,32 +156,15 @@ NewWinding (threaddata_t *thread, int points)
winding_t *winding; winding_t *winding;
unsigned size; unsigned size;
if (points > MAX_POINTS_ON_WINDING)
Sys_Error ("NewWinding: %i points", points);
size = field_offset (winding_t, points[points]); size = field_offset (winding_t, points[points]);
winding = CMEMALLOC (13, winding_t, thread->winding, thread->memsuper); winding = Hunk_RawAlloc (thread->hunk, size);
memset (winding, 0, size); memset (winding, 0, size);
thread->stats.winding_alloc++;
winding->id = thread->winding_id++; winding->id = thread->winding_id++;
winding->thread = thread->id; winding->thread = thread->id;
return winding; return winding;
} }
void
FreeWinding (threaddata_t *thread, winding_t *w)
{
if (!w->original) {
unsigned count = thread->stats.winding_alloc - thread->stats.winding_free;
if (count > thread->stats.winding_highwater) {
thread->stats.winding_highwater = count;
}
thread->stats.winding_free++;
CMEMFREE (thread->winding, w);
}
}
winding_t * winding_t *
CopyWinding (threaddata_t *thread, const winding_t *w) CopyWinding (threaddata_t *thread, const winding_t *w)
{ {
@ -189,10 +172,9 @@ CopyWinding (threaddata_t *thread, const winding_t *w)
winding_t *copy; winding_t *copy;
size = field_offset (winding_t, points[w->numpoints]); size = field_offset (winding_t, points[w->numpoints]);
copy = CMEMALLOC (13, winding_t, thread->winding, thread->memsuper); copy = Hunk_RawAlloc (thread->hunk, size);
memcpy (copy, w, size); memcpy (copy, w, size);
copy->original = false; copy->original = false;
thread->stats.winding_alloc++;
copy->id = thread->winding_id++; copy->id = thread->winding_id++;
copy->thread = thread->id; copy->thread = thread->id;
return copy; return copy;
@ -362,7 +344,6 @@ ClipWinding (threaddata_t *thread, winding_t *in, vec4f_t split,
return in; return in;
} }
if (!counts[SIDE_FRONT]) { if (!counts[SIDE_FRONT]) {
FreeWinding (thread, in);
return NULL; return NULL;
} }
if (!counts[SIDE_BACK]) { if (!counts[SIDE_BACK]) {
@ -397,8 +378,6 @@ ClipWinding (threaddata_t *thread, winding_t *in, vec4f_t split,
Sys_Error ("ClipWinding: points exceeded estimate: n:%u m:%u", Sys_Error ("ClipWinding: points exceeded estimate: n:%u m:%u",
neww->numpoints, maxpts); neww->numpoints, maxpts);
} }
// free the original winding
FreeWinding (thread, in);
return neww; return neww;
} }
@ -461,8 +440,8 @@ UpdateStats (threaddata_t *thread)
global_stats.mightseeupdate += thread->stats.mightseeupdate; global_stats.mightseeupdate += thread->stats.mightseeupdate;
global_stats.sep_alloc += thread->stats.sep_alloc; global_stats.sep_alloc += thread->stats.sep_alloc;
global_stats.sep_free += thread->stats.sep_free; global_stats.sep_free += thread->stats.sep_free;
global_stats.winding_alloc += thread->stats.winding_alloc; global_stats.winding_mark = max (global_stats.winding_mark,
global_stats.winding_free += thread->stats.winding_free; thread->stats.winding_mark);
global_stats.stack_alloc += thread->stats.stack_alloc; global_stats.stack_alloc += thread->stats.stack_alloc;
global_stats.stack_free += thread->stats.stack_free; global_stats.stack_free += thread->stats.stack_free;
UNLOCK (stats_lock); UNLOCK (stats_lock);
@ -569,8 +548,10 @@ LeafThread (void *_thread)
int thread = (int) (intptr_t) _thread; int thread = (int) (intptr_t) _thread;
threaddata_t data; threaddata_t data;
int count = 0; int count = 0;
size_t thread_memsize = 1024 * 1024;
memset (&data, 0, sizeof (data)); memset (&data, 0, sizeof (data));
data.hunk = Hunk_Init (Sys_Alloc (thread_memsize), thread_memsize);
set_pool_init (&data.set_pool); set_pool_init (&data.set_pool);
data.id = thread; data.id = thread;
data.memsuper = new_memsuper (); data.memsuper = new_memsuper ();
@ -588,13 +569,13 @@ LeafThread (void *_thread)
PortalFlow (&data, portal); PortalFlow (&data, portal);
int whw = data.stats.winding_highwater; int whm = data.stats.winding_mark;
int shw = data.stats.sep_highwater; int shw = data.stats.sep_highwater;
int smb = data.stats.sep_maxbulk; int smb = data.stats.sep_maxbulk;
PortalCompleted (&data, portal); PortalCompleted (&data, portal);
data.stats.sep_highwater = shw; data.stats.sep_highwater = shw;
data.stats.sep_maxbulk = smb; data.stats.sep_maxbulk = smb;
data.stats.winding_highwater = whw; data.stats.winding_mark = whm;
if (options.verbosity >= 4) if (options.verbosity >= 4)
printf ("portal:%5i mightsee:%5i cansee:%5i %5u/%u\n", printf ("portal:%5i mightsee:%5i cansee:%5i %5u/%u\n",
@ -605,8 +586,8 @@ LeafThread (void *_thread)
} while (1); } while (1);
if (options.verbosity >= 2) { if (options.verbosity >= 2) {
printf ("thread %d winding highwater: %d\n", thread, printf ("thread %d winding mark: %zd\n", thread,
data.stats.winding_highwater); data.stats.winding_mark);
printf ("thread %d separator highwater: %d\n", thread, printf ("thread %d separator highwater: %d\n", thread,
data.stats.sep_highwater); data.stats.sep_highwater);
printf ("thread %d separator maxbulk: %d\n", thread, printf ("thread %d separator maxbulk: %d\n", thread,
@ -619,6 +600,8 @@ LeafThread (void *_thread)
working[thread] = -1; working[thread] = -1;
delete_memsuper (data.memsuper); delete_memsuper (data.memsuper);
dstring_delete (data.str); dstring_delete (data.str);
Sys_Free (data.hunk, thread_memsize);
return NULL; return NULL;
} }
@ -965,9 +948,7 @@ CalcPortalVis (void)
printf ("separators allocated: %u freed: %u %u\n", printf ("separators allocated: %u freed: %u %u\n",
global_stats.sep_alloc, global_stats.sep_free, global_stats.sep_alloc, global_stats.sep_free,
global_stats.sep_alloc - global_stats.sep_free); global_stats.sep_alloc - global_stats.sep_free);
printf ("windings allocated: %u freed: %u %u\n", printf ("max windings mark: %zd\n", global_stats.winding_mark);
global_stats.winding_alloc, global_stats.winding_free,
global_stats.winding_alloc - global_stats.winding_free);
printf ("stack blocks allocated: %u freed: %u %u\n", printf ("stack blocks allocated: %u freed: %u %u\n",
global_stats.stack_alloc, global_stats.stack_free, global_stats.stack_alloc, global_stats.stack_free,
global_stats.stack_alloc - global_stats.stack_free); global_stats.stack_alloc - global_stats.stack_free);
@ -1283,8 +1264,6 @@ LoadPortals (char *name)
line = err; line = err;
} }
if (numpoints > MAX_POINTS_ON_WINDING)
Sys_Error ("LoadPortals: portal %u has too many points", i);
if ((unsigned) clusternums[0] > (unsigned) portalclusters if ((unsigned) clusternums[0] > (unsigned) portalclusters
|| (unsigned) clusternums[1] > (unsigned) portalclusters) || (unsigned) clusternums[1] > (unsigned) portalclusters)
Sys_Error ("LoadPortals: reading portal %u", i); Sys_Error ("LoadPortals: reading portal %u", i);
@ -1413,8 +1392,10 @@ main (int argc, char **argv)
{ {
double start, stop; double start, stop;
QFile *f; QFile *f;
size_t main_memsize = 1024 * 1024 * 1024;
main_thread.memsuper = new_memsuper (); main_thread.memsuper = new_memsuper ();
main_thread.hunk = Hunk_Init (Sys_Alloc (main_memsize), main_memsize);
start = Sys_DoubleTime (); start = Sys_DoubleTime ();