mirror of
https://git.code.sf.net/p/quake/quakeforge
synced 2025-02-16 17:01:53 +00:00
[qfvis] Use hunk to manage winding memory
It turns out cmem is not so good for many large allocations (probably a bug in handling the blocks), but was really meant for lots of little churning allocations anyway. After an analysis of winding lifetimes, it became clear that the hunk allocator would work very well. The base windings are allocated from a global hunk (currently 1GB, plenty for even ad_tears), and ephemeral windings are allocated from a per-thread hunk of 1MB (seems to be way more than enough: gmsp3v2 uses a maximum of only 56064 bytes, and ad_tears got through 30% before I gave up on it). Any speed difference (for gmsp3v2) seems to be lost in the noise: still completing in 38.4s on my machine.
This commit is contained in:
parent
8f376a48f8
commit
72a1fef714
3 changed files with 26 additions and 49 deletions
|
@ -76,6 +76,7 @@ extern pthread_rwlock_t *stats_lock;
|
||||||
#include "QF/cmem.h"
|
#include "QF/cmem.h"
|
||||||
#include "QF/dstring.h"
|
#include "QF/dstring.h"
|
||||||
#include "QF/set.h"
|
#include "QF/set.h"
|
||||||
|
#include "QF/zone.h"
|
||||||
#include "QF/simd/vec4f.h"
|
#include "QF/simd/vec4f.h"
|
||||||
|
|
||||||
#define MAX_PORTALS 32768
|
#define MAX_PORTALS 32768
|
||||||
|
@ -92,7 +93,7 @@ typedef struct winding_s {
|
||||||
unsigned numpoints;
|
unsigned numpoints;
|
||||||
int id;
|
int id;
|
||||||
int thread;
|
int thread;
|
||||||
vec4f_t points[MAX_PORTALS_ON_CLUSTER]; // variable sized
|
vec4f_t points[1]; // variable sized
|
||||||
} winding_t;
|
} winding_t;
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
|
@ -161,9 +162,7 @@ typedef struct {
|
||||||
unsigned sep_free; ///< how many separators were freed
|
unsigned sep_free; ///< how many separators were freed
|
||||||
unsigned sep_highwater; ///< most separators in flight
|
unsigned sep_highwater; ///< most separators in flight
|
||||||
unsigned sep_maxbulk; ///< most separators freed at once
|
unsigned sep_maxbulk; ///< most separators freed at once
|
||||||
unsigned winding_alloc; ///< how many windings were allocated
|
size_t winding_mark; ///< most memory allocated to windings
|
||||||
unsigned winding_free; ///< how many windings were freed
|
|
||||||
unsigned winding_highwater; ///< most windings in flight
|
|
||||||
unsigned stack_alloc; ///< how many stack blocks were allocated
|
unsigned stack_alloc; ///< how many stack blocks were allocated
|
||||||
unsigned stack_free; ///< how many stack blocks were freed
|
unsigned stack_free; ///< how many stack blocks were freed
|
||||||
} visstat_t;
|
} visstat_t;
|
||||||
|
@ -176,6 +175,7 @@ typedef struct threaddata_s {
|
||||||
sep_t *sep_freelist; ///< per-thread list of free separators
|
sep_t *sep_freelist; ///< per-thread list of free separators
|
||||||
winding_t *winding_freelist; ///< per-thread list of free windings
|
winding_t *winding_freelist; ///< per-thread list of free windings
|
||||||
memsuper_t *memsuper; ///< per-thread memory pool
|
memsuper_t *memsuper; ///< per-thread memory pool
|
||||||
|
memhunk_t *hunk;
|
||||||
dstring_t *str;
|
dstring_t *str;
|
||||||
set_pool_t set_pool;
|
set_pool_t set_pool;
|
||||||
int id;
|
int id;
|
||||||
|
@ -203,7 +203,6 @@ extern cluster_t *clusters;
|
||||||
extern int *leafcluster;
|
extern int *leafcluster;
|
||||||
extern byte *uncompressed;
|
extern byte *uncompressed;
|
||||||
|
|
||||||
void FreeWinding (threaddata_t *thread, winding_t *w);
|
|
||||||
winding_t *NewWinding (threaddata_t *thread, int points);
|
winding_t *NewWinding (threaddata_t *thread, int points);
|
||||||
winding_t *ClipWinding (threaddata_t *thread, winding_t *in, vec4f_t split,
|
winding_t *ClipWinding (threaddata_t *thread, winding_t *in, vec4f_t split,
|
||||||
qboolean keepon);
|
qboolean keepon);
|
||||||
|
|
|
@ -345,13 +345,18 @@ RecursiveClusterFlow (int clusternum, threaddata_t *thread, pstack_t *prevstack)
|
||||||
pass_winding = prevstack->pass_winding;
|
pass_winding = prevstack->pass_winding;
|
||||||
pass_plane = prevstack->pass_plane;
|
pass_plane = prevstack->pass_plane;
|
||||||
|
|
||||||
|
size_t winding_mark = Hunk_LowMark (thread->hunk);
|
||||||
|
|
||||||
// check all portals for flowing into other clusters
|
// check all portals for flowing into other clusters
|
||||||
for (i = 0; i < cluster->numportals; i++) {
|
for (i = 0; i < cluster->numportals; i++) {
|
||||||
target_portal = cluster->portals[i];
|
target_portal = cluster->portals[i];
|
||||||
|
|
||||||
if (!set_is_member (prevstack->mightsee, target_portal->cluster))
|
if (!set_is_member (prevstack->mightsee, target_portal->cluster))
|
||||||
continue; // can't possibly see it
|
continue; // can't possibly see it
|
||||||
|
|
||||||
|
thread->stats.winding_mark = max (thread->stats.winding_mark,
|
||||||
|
Hunk_LowMark (thread->hunk));
|
||||||
|
Hunk_RawFreeToLowMark (thread->hunk, winding_mark);
|
||||||
|
|
||||||
// if target_portal can't see anything we haven't already seen, skip it
|
// if target_portal can't see anything we haven't already seen, skip it
|
||||||
test = select_test_set (target_portal, thread);
|
test = select_test_set (target_portal, thread);
|
||||||
if (!mightsee_more (might, prevstack->mightsee, test, vis)) {
|
if (!mightsee_more (might, prevstack->mightsee, test, vis)) {
|
||||||
|
@ -385,7 +390,6 @@ RecursiveClusterFlow (int clusternum, threaddata_t *thread, pstack_t *prevstack)
|
||||||
stack->pass_portal = target_portal;
|
stack->pass_portal = target_portal;
|
||||||
|
|
||||||
RecursiveClusterFlow (target_portal->cluster, thread, stack);
|
RecursiveClusterFlow (target_portal->cluster, thread, stack);
|
||||||
FreeWinding (thread, target_winding);
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -399,7 +403,6 @@ RecursiveClusterFlow (int clusternum, threaddata_t *thread, pstack_t *prevstack)
|
||||||
|
|
||||||
source_winding = ClipWinding (thread, source_winding, backplane, false);
|
source_winding = ClipWinding (thread, source_winding, backplane, false);
|
||||||
if (!source_winding) {
|
if (!source_winding) {
|
||||||
FreeWinding (thread, target_winding);
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -418,7 +421,6 @@ RecursiveClusterFlow (int clusternum, threaddata_t *thread, pstack_t *prevstack)
|
||||||
target_winding);
|
target_winding);
|
||||||
if (!target_winding) {
|
if (!target_winding) {
|
||||||
thread->stats.targetclipped++;
|
thread->stats.targetclipped++;
|
||||||
FreeWinding (thread, source_winding);
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (target_winding != old)
|
if (target_winding != old)
|
||||||
|
@ -437,7 +439,6 @@ RecursiveClusterFlow (int clusternum, threaddata_t *thread, pstack_t *prevstack)
|
||||||
target_winding);
|
target_winding);
|
||||||
if (!target_winding) {
|
if (!target_winding) {
|
||||||
thread->stats.targetclipped++;
|
thread->stats.targetclipped++;
|
||||||
FreeWinding (thread, source_winding);
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (target_winding != old)
|
if (target_winding != old)
|
||||||
|
@ -455,7 +456,6 @@ RecursiveClusterFlow (int clusternum, threaddata_t *thread, pstack_t *prevstack)
|
||||||
free_separators (thread, sep);
|
free_separators (thread, sep);
|
||||||
if (!source_winding) {
|
if (!source_winding) {
|
||||||
thread->stats.sourceclipped++;
|
thread->stats.sourceclipped++;
|
||||||
FreeWinding (thread, target_winding);
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (source_winding != old)
|
if (source_winding != old)
|
||||||
|
@ -471,7 +471,6 @@ RecursiveClusterFlow (int clusternum, threaddata_t *thread, pstack_t *prevstack)
|
||||||
free_separators (thread, sep);
|
free_separators (thread, sep);
|
||||||
if (!source_winding) {
|
if (!source_winding) {
|
||||||
thread->stats.sourceclipped++;
|
thread->stats.sourceclipped++;
|
||||||
FreeWinding (thread, target_winding);
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (source_winding != old)
|
if (source_winding != old)
|
||||||
|
@ -488,10 +487,8 @@ RecursiveClusterFlow (int clusternum, threaddata_t *thread, pstack_t *prevstack)
|
||||||
|
|
||||||
// flow through it for real
|
// flow through it for real
|
||||||
RecursiveClusterFlow (target_portal->cluster, thread, stack);
|
RecursiveClusterFlow (target_portal->cluster, thread, stack);
|
||||||
|
|
||||||
FreeWinding (thread, source_winding);
|
|
||||||
FreeWinding (thread, target_winding);
|
|
||||||
}
|
}
|
||||||
|
Hunk_RawFreeToLowMark (thread->hunk, winding_mark);
|
||||||
free_separators (thread, stack->separators[1]);
|
free_separators (thread, stack->separators[1]);
|
||||||
free_separators (thread, stack->separators[0]);
|
free_separators (thread, stack->separators[0]);
|
||||||
}
|
}
|
||||||
|
|
|
@ -156,32 +156,15 @@ NewWinding (threaddata_t *thread, int points)
|
||||||
winding_t *winding;
|
winding_t *winding;
|
||||||
unsigned size;
|
unsigned size;
|
||||||
|
|
||||||
if (points > MAX_POINTS_ON_WINDING)
|
|
||||||
Sys_Error ("NewWinding: %i points", points);
|
|
||||||
|
|
||||||
size = field_offset (winding_t, points[points]);
|
size = field_offset (winding_t, points[points]);
|
||||||
winding = CMEMALLOC (13, winding_t, thread->winding, thread->memsuper);
|
winding = Hunk_RawAlloc (thread->hunk, size);
|
||||||
memset (winding, 0, size);
|
memset (winding, 0, size);
|
||||||
thread->stats.winding_alloc++;
|
|
||||||
winding->id = thread->winding_id++;
|
winding->id = thread->winding_id++;
|
||||||
winding->thread = thread->id;
|
winding->thread = thread->id;
|
||||||
|
|
||||||
return winding;
|
return winding;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
|
||||||
FreeWinding (threaddata_t *thread, winding_t *w)
|
|
||||||
{
|
|
||||||
if (!w->original) {
|
|
||||||
unsigned count = thread->stats.winding_alloc - thread->stats.winding_free;
|
|
||||||
if (count > thread->stats.winding_highwater) {
|
|
||||||
thread->stats.winding_highwater = count;
|
|
||||||
}
|
|
||||||
thread->stats.winding_free++;
|
|
||||||
CMEMFREE (thread->winding, w);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
winding_t *
|
winding_t *
|
||||||
CopyWinding (threaddata_t *thread, const winding_t *w)
|
CopyWinding (threaddata_t *thread, const winding_t *w)
|
||||||
{
|
{
|
||||||
|
@ -189,10 +172,9 @@ CopyWinding (threaddata_t *thread, const winding_t *w)
|
||||||
winding_t *copy;
|
winding_t *copy;
|
||||||
|
|
||||||
size = field_offset (winding_t, points[w->numpoints]);
|
size = field_offset (winding_t, points[w->numpoints]);
|
||||||
copy = CMEMALLOC (13, winding_t, thread->winding, thread->memsuper);
|
copy = Hunk_RawAlloc (thread->hunk, size);
|
||||||
memcpy (copy, w, size);
|
memcpy (copy, w, size);
|
||||||
copy->original = false;
|
copy->original = false;
|
||||||
thread->stats.winding_alloc++;
|
|
||||||
copy->id = thread->winding_id++;
|
copy->id = thread->winding_id++;
|
||||||
copy->thread = thread->id;
|
copy->thread = thread->id;
|
||||||
return copy;
|
return copy;
|
||||||
|
@ -362,7 +344,6 @@ ClipWinding (threaddata_t *thread, winding_t *in, vec4f_t split,
|
||||||
return in;
|
return in;
|
||||||
}
|
}
|
||||||
if (!counts[SIDE_FRONT]) {
|
if (!counts[SIDE_FRONT]) {
|
||||||
FreeWinding (thread, in);
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
if (!counts[SIDE_BACK]) {
|
if (!counts[SIDE_BACK]) {
|
||||||
|
@ -397,8 +378,6 @@ ClipWinding (threaddata_t *thread, winding_t *in, vec4f_t split,
|
||||||
Sys_Error ("ClipWinding: points exceeded estimate: n:%u m:%u",
|
Sys_Error ("ClipWinding: points exceeded estimate: n:%u m:%u",
|
||||||
neww->numpoints, maxpts);
|
neww->numpoints, maxpts);
|
||||||
}
|
}
|
||||||
// free the original winding
|
|
||||||
FreeWinding (thread, in);
|
|
||||||
|
|
||||||
return neww;
|
return neww;
|
||||||
}
|
}
|
||||||
|
@ -461,8 +440,8 @@ UpdateStats (threaddata_t *thread)
|
||||||
global_stats.mightseeupdate += thread->stats.mightseeupdate;
|
global_stats.mightseeupdate += thread->stats.mightseeupdate;
|
||||||
global_stats.sep_alloc += thread->stats.sep_alloc;
|
global_stats.sep_alloc += thread->stats.sep_alloc;
|
||||||
global_stats.sep_free += thread->stats.sep_free;
|
global_stats.sep_free += thread->stats.sep_free;
|
||||||
global_stats.winding_alloc += thread->stats.winding_alloc;
|
global_stats.winding_mark = max (global_stats.winding_mark,
|
||||||
global_stats.winding_free += thread->stats.winding_free;
|
thread->stats.winding_mark);
|
||||||
global_stats.stack_alloc += thread->stats.stack_alloc;
|
global_stats.stack_alloc += thread->stats.stack_alloc;
|
||||||
global_stats.stack_free += thread->stats.stack_free;
|
global_stats.stack_free += thread->stats.stack_free;
|
||||||
UNLOCK (stats_lock);
|
UNLOCK (stats_lock);
|
||||||
|
@ -569,8 +548,10 @@ LeafThread (void *_thread)
|
||||||
int thread = (int) (intptr_t) _thread;
|
int thread = (int) (intptr_t) _thread;
|
||||||
threaddata_t data;
|
threaddata_t data;
|
||||||
int count = 0;
|
int count = 0;
|
||||||
|
size_t thread_memsize = 1024 * 1024;
|
||||||
|
|
||||||
memset (&data, 0, sizeof (data));
|
memset (&data, 0, sizeof (data));
|
||||||
|
data.hunk = Hunk_Init (Sys_Alloc (thread_memsize), thread_memsize);
|
||||||
set_pool_init (&data.set_pool);
|
set_pool_init (&data.set_pool);
|
||||||
data.id = thread;
|
data.id = thread;
|
||||||
data.memsuper = new_memsuper ();
|
data.memsuper = new_memsuper ();
|
||||||
|
@ -588,13 +569,13 @@ LeafThread (void *_thread)
|
||||||
|
|
||||||
PortalFlow (&data, portal);
|
PortalFlow (&data, portal);
|
||||||
|
|
||||||
int whw = data.stats.winding_highwater;
|
int whm = data.stats.winding_mark;
|
||||||
int shw = data.stats.sep_highwater;
|
int shw = data.stats.sep_highwater;
|
||||||
int smb = data.stats.sep_maxbulk;
|
int smb = data.stats.sep_maxbulk;
|
||||||
PortalCompleted (&data, portal);
|
PortalCompleted (&data, portal);
|
||||||
data.stats.sep_highwater = shw;
|
data.stats.sep_highwater = shw;
|
||||||
data.stats.sep_maxbulk = smb;
|
data.stats.sep_maxbulk = smb;
|
||||||
data.stats.winding_highwater = whw;
|
data.stats.winding_mark = whm;
|
||||||
|
|
||||||
if (options.verbosity >= 4)
|
if (options.verbosity >= 4)
|
||||||
printf ("portal:%5i mightsee:%5i cansee:%5i %5u/%u\n",
|
printf ("portal:%5i mightsee:%5i cansee:%5i %5u/%u\n",
|
||||||
|
@ -605,8 +586,8 @@ LeafThread (void *_thread)
|
||||||
} while (1);
|
} while (1);
|
||||||
|
|
||||||
if (options.verbosity >= 2) {
|
if (options.verbosity >= 2) {
|
||||||
printf ("thread %d winding highwater: %d\n", thread,
|
printf ("thread %d winding mark: %zd\n", thread,
|
||||||
data.stats.winding_highwater);
|
data.stats.winding_mark);
|
||||||
printf ("thread %d separator highwater: %d\n", thread,
|
printf ("thread %d separator highwater: %d\n", thread,
|
||||||
data.stats.sep_highwater);
|
data.stats.sep_highwater);
|
||||||
printf ("thread %d separator maxbulk: %d\n", thread,
|
printf ("thread %d separator maxbulk: %d\n", thread,
|
||||||
|
@ -619,6 +600,8 @@ LeafThread (void *_thread)
|
||||||
working[thread] = -1;
|
working[thread] = -1;
|
||||||
delete_memsuper (data.memsuper);
|
delete_memsuper (data.memsuper);
|
||||||
dstring_delete (data.str);
|
dstring_delete (data.str);
|
||||||
|
|
||||||
|
Sys_Free (data.hunk, thread_memsize);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -965,9 +948,7 @@ CalcPortalVis (void)
|
||||||
printf ("separators allocated: %u freed: %u %u\n",
|
printf ("separators allocated: %u freed: %u %u\n",
|
||||||
global_stats.sep_alloc, global_stats.sep_free,
|
global_stats.sep_alloc, global_stats.sep_free,
|
||||||
global_stats.sep_alloc - global_stats.sep_free);
|
global_stats.sep_alloc - global_stats.sep_free);
|
||||||
printf ("windings allocated: %u freed: %u %u\n",
|
printf ("max windings mark: %zd\n", global_stats.winding_mark);
|
||||||
global_stats.winding_alloc, global_stats.winding_free,
|
|
||||||
global_stats.winding_alloc - global_stats.winding_free);
|
|
||||||
printf ("stack blocks allocated: %u freed: %u %u\n",
|
printf ("stack blocks allocated: %u freed: %u %u\n",
|
||||||
global_stats.stack_alloc, global_stats.stack_free,
|
global_stats.stack_alloc, global_stats.stack_free,
|
||||||
global_stats.stack_alloc - global_stats.stack_free);
|
global_stats.stack_alloc - global_stats.stack_free);
|
||||||
|
@ -1283,8 +1264,6 @@ LoadPortals (char *name)
|
||||||
line = err;
|
line = err;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (numpoints > MAX_POINTS_ON_WINDING)
|
|
||||||
Sys_Error ("LoadPortals: portal %u has too many points", i);
|
|
||||||
if ((unsigned) clusternums[0] > (unsigned) portalclusters
|
if ((unsigned) clusternums[0] > (unsigned) portalclusters
|
||||||
|| (unsigned) clusternums[1] > (unsigned) portalclusters)
|
|| (unsigned) clusternums[1] > (unsigned) portalclusters)
|
||||||
Sys_Error ("LoadPortals: reading portal %u", i);
|
Sys_Error ("LoadPortals: reading portal %u", i);
|
||||||
|
@ -1413,8 +1392,10 @@ main (int argc, char **argv)
|
||||||
{
|
{
|
||||||
double start, stop;
|
double start, stop;
|
||||||
QFile *f;
|
QFile *f;
|
||||||
|
size_t main_memsize = 1024 * 1024 * 1024;
|
||||||
|
|
||||||
main_thread.memsuper = new_memsuper ();
|
main_thread.memsuper = new_memsuper ();
|
||||||
|
main_thread.hunk = Hunk_Init (Sys_Alloc (main_memsize), main_memsize);
|
||||||
|
|
||||||
start = Sys_DoubleTime ();
|
start = Sys_DoubleTime ();
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue