[qfvis] Use hunk to manage winding memory

It turns out cmem is not so good for many large allocations (probably a
bug in handling the blocks), but was really meant for lots of little
churning allocations anyway. After an analysis of winding lifetimes, it
became clear that the hunk allocator would work very well. The base
windings are allocated from a global hunk (currently 1GB, plenty for
even ad_tears), and ephemeral windings are allocated from a per-thread
hunk of 1MB (seems to be way more than enough: gmsp3v2 uses a maximum of
only 56064 bytes, and ad_tears got through 30% before I gave up on it).
Any speed difference (for gmsp3v2) seems to be lost in the noise: still
completing in 38.4s on my machine.
This commit is contained in:
Bill Currie 2021-07-29 11:49:18 +09:00
parent 8f376a48f8
commit 72a1fef714
3 changed files with 26 additions and 49 deletions

View file

@ -76,6 +76,7 @@ extern pthread_rwlock_t *stats_lock;
#include "QF/cmem.h"
#include "QF/dstring.h"
#include "QF/set.h"
#include "QF/zone.h"
#include "QF/simd/vec4f.h"
#define MAX_PORTALS 32768
@ -92,7 +93,7 @@ typedef struct winding_s {
unsigned numpoints;
int id;
int thread;
vec4f_t points[MAX_PORTALS_ON_CLUSTER]; // variable sized
vec4f_t points[1]; // variable sized
} winding_t;
typedef enum {
@ -161,9 +162,7 @@ typedef struct {
unsigned sep_free; ///< how many separators were freed
unsigned sep_highwater; ///< most separators in flight
unsigned sep_maxbulk; ///< most separators freed at once
unsigned winding_alloc; ///< how many windings were allocated
unsigned winding_free; ///< how many windings were freed
unsigned winding_highwater; ///< most windings in flight
size_t winding_mark; ///< most memory allocated to windings
unsigned stack_alloc; ///< how many stack blocks were allocated
unsigned stack_free; ///< how many stack blocks were freed
} visstat_t;
@ -176,6 +175,7 @@ typedef struct threaddata_s {
sep_t *sep_freelist; ///< per-thread list of free separators
winding_t *winding_freelist; ///< per-thread list of free windings
memsuper_t *memsuper; ///< per-thread memory pool
memhunk_t *hunk;
dstring_t *str;
set_pool_t set_pool;
int id;
@ -203,7 +203,6 @@ extern cluster_t *clusters;
extern int *leafcluster;
extern byte *uncompressed;
void FreeWinding (threaddata_t *thread, winding_t *w);
winding_t *NewWinding (threaddata_t *thread, int points);
winding_t *ClipWinding (threaddata_t *thread, winding_t *in, vec4f_t split,
qboolean keepon);

View file

@ -345,13 +345,18 @@ RecursiveClusterFlow (int clusternum, threaddata_t *thread, pstack_t *prevstack)
pass_winding = prevstack->pass_winding;
pass_plane = prevstack->pass_plane;
size_t winding_mark = Hunk_LowMark (thread->hunk);
// check all portals for flowing into other clusters
for (i = 0; i < cluster->numportals; i++) {
target_portal = cluster->portals[i];
if (!set_is_member (prevstack->mightsee, target_portal->cluster))
continue; // can't possibly see it
thread->stats.winding_mark = max (thread->stats.winding_mark,
Hunk_LowMark (thread->hunk));
Hunk_RawFreeToLowMark (thread->hunk, winding_mark);
// if target_portal can't see anything we haven't already seen, skip it
test = select_test_set (target_portal, thread);
if (!mightsee_more (might, prevstack->mightsee, test, vis)) {
@ -385,7 +390,6 @@ RecursiveClusterFlow (int clusternum, threaddata_t *thread, pstack_t *prevstack)
stack->pass_portal = target_portal;
RecursiveClusterFlow (target_portal->cluster, thread, stack);
FreeWinding (thread, target_winding);
continue;
}
@ -399,7 +403,6 @@ RecursiveClusterFlow (int clusternum, threaddata_t *thread, pstack_t *prevstack)
source_winding = ClipWinding (thread, source_winding, backplane, false);
if (!source_winding) {
FreeWinding (thread, target_winding);
continue;
}
@ -418,7 +421,6 @@ RecursiveClusterFlow (int clusternum, threaddata_t *thread, pstack_t *prevstack)
target_winding);
if (!target_winding) {
thread->stats.targetclipped++;
FreeWinding (thread, source_winding);
continue;
}
if (target_winding != old)
@ -437,7 +439,6 @@ RecursiveClusterFlow (int clusternum, threaddata_t *thread, pstack_t *prevstack)
target_winding);
if (!target_winding) {
thread->stats.targetclipped++;
FreeWinding (thread, source_winding);
continue;
}
if (target_winding != old)
@ -455,7 +456,6 @@ RecursiveClusterFlow (int clusternum, threaddata_t *thread, pstack_t *prevstack)
free_separators (thread, sep);
if (!source_winding) {
thread->stats.sourceclipped++;
FreeWinding (thread, target_winding);
continue;
}
if (source_winding != old)
@ -471,7 +471,6 @@ RecursiveClusterFlow (int clusternum, threaddata_t *thread, pstack_t *prevstack)
free_separators (thread, sep);
if (!source_winding) {
thread->stats.sourceclipped++;
FreeWinding (thread, target_winding);
continue;
}
if (source_winding != old)
@ -488,10 +487,8 @@ RecursiveClusterFlow (int clusternum, threaddata_t *thread, pstack_t *prevstack)
// flow through it for real
RecursiveClusterFlow (target_portal->cluster, thread, stack);
FreeWinding (thread, source_winding);
FreeWinding (thread, target_winding);
}
Hunk_RawFreeToLowMark (thread->hunk, winding_mark);
free_separators (thread, stack->separators[1]);
free_separators (thread, stack->separators[0]);
}

View file

@ -156,32 +156,15 @@ NewWinding (threaddata_t *thread, int points)
winding_t *winding;
unsigned size;
if (points > MAX_POINTS_ON_WINDING)
Sys_Error ("NewWinding: %i points", points);
size = field_offset (winding_t, points[points]);
winding = CMEMALLOC (13, winding_t, thread->winding, thread->memsuper);
winding = Hunk_RawAlloc (thread->hunk, size);
memset (winding, 0, size);
thread->stats.winding_alloc++;
winding->id = thread->winding_id++;
winding->thread = thread->id;
return winding;
}
void
FreeWinding (threaddata_t *thread, winding_t *w)
{
if (!w->original) {
unsigned count = thread->stats.winding_alloc - thread->stats.winding_free;
if (count > thread->stats.winding_highwater) {
thread->stats.winding_highwater = count;
}
thread->stats.winding_free++;
CMEMFREE (thread->winding, w);
}
}
winding_t *
CopyWinding (threaddata_t *thread, const winding_t *w)
{
@ -189,10 +172,9 @@ CopyWinding (threaddata_t *thread, const winding_t *w)
winding_t *copy;
size = field_offset (winding_t, points[w->numpoints]);
copy = CMEMALLOC (13, winding_t, thread->winding, thread->memsuper);
copy = Hunk_RawAlloc (thread->hunk, size);
memcpy (copy, w, size);
copy->original = false;
thread->stats.winding_alloc++;
copy->id = thread->winding_id++;
copy->thread = thread->id;
return copy;
@ -362,7 +344,6 @@ ClipWinding (threaddata_t *thread, winding_t *in, vec4f_t split,
return in;
}
if (!counts[SIDE_FRONT]) {
FreeWinding (thread, in);
return NULL;
}
if (!counts[SIDE_BACK]) {
@ -397,8 +378,6 @@ ClipWinding (threaddata_t *thread, winding_t *in, vec4f_t split,
Sys_Error ("ClipWinding: points exceeded estimate: n:%u m:%u",
neww->numpoints, maxpts);
}
// free the original winding
FreeWinding (thread, in);
return neww;
}
@ -461,8 +440,8 @@ UpdateStats (threaddata_t *thread)
global_stats.mightseeupdate += thread->stats.mightseeupdate;
global_stats.sep_alloc += thread->stats.sep_alloc;
global_stats.sep_free += thread->stats.sep_free;
global_stats.winding_alloc += thread->stats.winding_alloc;
global_stats.winding_free += thread->stats.winding_free;
global_stats.winding_mark = max (global_stats.winding_mark,
thread->stats.winding_mark);
global_stats.stack_alloc += thread->stats.stack_alloc;
global_stats.stack_free += thread->stats.stack_free;
UNLOCK (stats_lock);
@ -569,8 +548,10 @@ LeafThread (void *_thread)
int thread = (int) (intptr_t) _thread;
threaddata_t data;
int count = 0;
size_t thread_memsize = 1024 * 1024;
memset (&data, 0, sizeof (data));
data.hunk = Hunk_Init (Sys_Alloc (thread_memsize), thread_memsize);
set_pool_init (&data.set_pool);
data.id = thread;
data.memsuper = new_memsuper ();
@ -588,13 +569,13 @@ LeafThread (void *_thread)
PortalFlow (&data, portal);
int whw = data.stats.winding_highwater;
int whm = data.stats.winding_mark;
int shw = data.stats.sep_highwater;
int smb = data.stats.sep_maxbulk;
PortalCompleted (&data, portal);
data.stats.sep_highwater = shw;
data.stats.sep_maxbulk = smb;
data.stats.winding_highwater = whw;
data.stats.winding_mark = whm;
if (options.verbosity >= 4)
printf ("portal:%5i mightsee:%5i cansee:%5i %5u/%u\n",
@ -605,8 +586,8 @@ LeafThread (void *_thread)
} while (1);
if (options.verbosity >= 2) {
printf ("thread %d winding highwater: %d\n", thread,
data.stats.winding_highwater);
printf ("thread %d winding mark: %zd\n", thread,
data.stats.winding_mark);
printf ("thread %d separator highwater: %d\n", thread,
data.stats.sep_highwater);
printf ("thread %d separator maxbulk: %d\n", thread,
@ -619,6 +600,8 @@ LeafThread (void *_thread)
working[thread] = -1;
delete_memsuper (data.memsuper);
dstring_delete (data.str);
Sys_Free (data.hunk, thread_memsize);
return NULL;
}
@ -965,9 +948,7 @@ CalcPortalVis (void)
printf ("separators allocated: %u freed: %u %u\n",
global_stats.sep_alloc, global_stats.sep_free,
global_stats.sep_alloc - global_stats.sep_free);
printf ("windings allocated: %u freed: %u %u\n",
global_stats.winding_alloc, global_stats.winding_free,
global_stats.winding_alloc - global_stats.winding_free);
printf ("max windings mark: %zd\n", global_stats.winding_mark);
printf ("stack blocks allocated: %u freed: %u %u\n",
global_stats.stack_alloc, global_stats.stack_free,
global_stats.stack_alloc - global_stats.stack_free);
@ -1283,8 +1264,6 @@ LoadPortals (char *name)
line = err;
}
if (numpoints > MAX_POINTS_ON_WINDING)
Sys_Error ("LoadPortals: portal %u has too many points", i);
if ((unsigned) clusternums[0] > (unsigned) portalclusters
|| (unsigned) clusternums[1] > (unsigned) portalclusters)
Sys_Error ("LoadPortals: reading portal %u", i);
@ -1413,8 +1392,10 @@ main (int argc, char **argv)
{
double start, stop;
QFile *f;
size_t main_memsize = 1024 * 1024 * 1024;
main_thread.memsuper = new_memsuper ();
main_thread.hunk = Hunk_Init (Sys_Alloc (main_memsize), main_memsize);
start = Sys_DoubleTime ();