[zone] Squeeze cache_system_t to fit into 64 bytes

The cache system pointers are now indices into an array of
cache_system_t blocks, allowing them to be 32 bits instead of 64, thus
allowing cache_system_t to fit into a single CPU cache line. This still
gives and effective 38 bits (256GB) of addressing for cache/hunk. This
does mean that the cache functions cannot work with more than 256GB, but
should that become a problem, cache and working hunking hunk can be
separate, and it should be possible to have multiple cache systems.
This commit is contained in:
Bill Currie 2021-12-13 11:23:25 +09:00
parent d857bdea77
commit ff40563fc0

View file

@ -77,8 +77,7 @@ typedef struct memblock_s {
size_t size; // requested size size_t size; // requested size
int tag; // a tag of 0 is a free block int tag; // a tag of 0 is a free block
int id; // should be ZONEID int id; // should be ZONEID
//int id2; // pad to 64 bit boundary } __attribute__((aligned (64))) memblock_t;
} memblock_t;
struct memzone_s { struct memzone_s {
size_t size; // total bytes malloced, including header size_t size; // total bytes malloced, including header
@ -87,10 +86,9 @@ struct memzone_s {
size_t ele_size; size_t ele_size;
void (*error) (void *, const char *); void (*error) (void *, const char *);
void *data; void *data;
memblock_t blocklist; // start / end cap for linked list
memblock_t *rover; memblock_t *rover;
}; memblock_t blocklist; // start / end cap for linked list
} __attribute__((aligned (64)));
static int static int
z_block_size (memblock_t *block) z_block_size (memblock_t *block)
@ -411,14 +409,16 @@ Z_CheckPointer (const memzone_t *zone, const void *ptr, size_t size)
typedef struct cache_system_s cache_system_t; typedef struct cache_system_s cache_system_t;
struct cache_system_s { struct cache_system_s {
cache_system_t *prev, *next; uint32_t prev;
cache_system_t *lru_prev, *lru_next; // for LRU flushing uint32_t next;
uint32_t lru_prev;
uint32_t lru_next;
struct memhunk_s *hunk; struct memhunk_s *hunk;
char name[16];
size_t size; // including this header size_t size; // including this header
int readlock;
cache_user_t *user; cache_user_t *user;
} __attribute__((aligned (64)));//FIXME base 64-bit size is 80, so 128... char name[16];
int readlock;
} __attribute__((aligned (64)));
typedef struct { typedef struct {
int sentinal1; int sentinal1;
@ -434,9 +434,21 @@ struct memhunk_s {
size_t high_used; size_t high_used;
size_t tempmark; size_t tempmark;
qboolean tempactive; qboolean tempactive;
cache_system_t cache_head; cache_system_t cache_head[1];
} __attribute__((aligned (64))); } __attribute__((aligned (64)));
static cache_system_t *
cs_ptr (memhunk_t *hunk, uint32_t cs_ind)
{
return &hunk->cache_head[cs_ind];
}
static uint32_t
cs_ind (memhunk_t *hunk, cache_system_t *cs_ptr)
{
return cs_ptr - hunk->cache_head;
}
static memhunk_t *global_hunk; static memhunk_t *global_hunk;
/* /*
@ -799,8 +811,8 @@ Cache_FreeLow (memhunk_t *hunk, int new_low_hunk)
cache_system_t *c; cache_system_t *c;
while (1) { while (1) {
c = hunk->cache_head.prev; c = cs_ptr (hunk, hunk->cache_head[0].prev);
if (c == &hunk->cache_head) if (c == hunk->cache_head)
return; // nothing in cache at all return; // nothing in cache at all
if ((byte *) c >= hunk->base + new_low_hunk) if ((byte *) c >= hunk->base + new_low_hunk)
return; // there is space to grow the hunk return; // there is space to grow the hunk
@ -812,27 +824,22 @@ Cache_FreeLow (memhunk_t *hunk, int new_low_hunk)
static inline void static inline void
Cache_UnlinkLRU (cache_system_t * cs) Cache_UnlinkLRU (cache_system_t * cs)
{ {
if (!cs->lru_next || !cs->lru_prev) memhunk_t *hunk = cs->hunk;
Sys_Error ("Cache_UnlinkLRU: NULL link: %.16s %p %p", cs_ptr (hunk, cs->lru_next)->lru_prev = cs->lru_prev;
cs->name, cs->lru_next, cs->lru_prev); cs_ptr (hunk, cs->lru_prev)->lru_next = cs->lru_next;
cs->lru_next->lru_prev = cs->lru_prev; cs->lru_prev = cs->lru_next = 0;
cs->lru_prev->lru_next = cs->lru_next;
cs->lru_prev = cs->lru_next = NULL;
} }
static void static void
Cache_MakeLRU (cache_system_t * cs) Cache_MakeLRU (cache_system_t * cs)
{ {
if (cs->lru_next || cs->lru_prev) memhunk_t *hunk = cs->hunk;
Sys_Error ("Cache_MakeLRU: active link: %.16s %p %p", __auto_type nx = cs_ptr (hunk, hunk->cache_head[0].lru_next);
cs->name, cs->lru_next, cs->lru_prev); nx->lru_prev = cs_ind (hunk, cs);
cs->lru_next = cs_ind (hunk, nx);
cs->hunk->cache_head.lru_next->lru_prev = cs; cs->lru_prev = 0;
cs->lru_next = cs->hunk->cache_head.lru_next; hunk->cache_head[0].lru_next = cs_ind (hunk, cs);
cs->lru_prev = &cs->hunk->cache_head;
cs->hunk->cache_head.lru_next = cs;
} }
static qboolean static qboolean
@ -841,10 +848,11 @@ Cache_FreeLRU (memhunk_t *hunk)
cache_system_t *cs; cache_system_t *cs;
//check_cache (); //check_cache ();
for (cs = hunk->cache_head.lru_prev; for (cs = cs_ptr (hunk, hunk->cache_head[0].lru_prev);
cs != &hunk->cache_head && cs->readlock; cs = cs->lru_prev) cs != hunk->cache_head && cs->readlock;
; cs = cs_ptr (hunk, cs->lru_prev)) {
if (cs == &hunk->cache_head) }
if (cs == hunk->cache_head)
return 0; return 0;
Cache_Free (cs->user); Cache_Free (cs->user);
return 1; return 1;
@ -853,10 +861,11 @@ Cache_FreeLRU (memhunk_t *hunk)
static void static void
link_cache_system (cache_system_t *new, cache_system_t *cs) link_cache_system (cache_system_t *new, cache_system_t *cs)
{ {
new->next = cs; memhunk_t *hunk = cs->hunk;
new->next = cs_ind (hunk, cs);
new->prev = cs->prev; new->prev = cs->prev;
cs->prev->next = new; cs_ptr (hunk, cs->prev)->next = cs_ind (hunk, new);
cs->prev = new; cs->prev = cs_ind (hunk, new);
} }
@ -873,22 +882,25 @@ Cache_TryAlloc (memhunk_t *hunk, size_t size, qboolean nobottom)
//check_cache (); //check_cache ();
// is the cache completely empty? // is the cache completely empty?
if (!nobottom && hunk->cache_head.prev == &hunk->cache_head) { if (!nobottom && hunk->cache_head[0].prev == 0) {
new = (cache_system_t *) Hunk_HighAlloc (hunk, size); new = (cache_system_t *) Hunk_HighAlloc (hunk, size);
if (!new) if (!new)
return 0; return 0;
memset (new, 0, size); memset (new, 0, size);
new->size = size; new->size = size;
new->hunk = hunk; new->hunk = hunk;
hunk->cache_head.prev = hunk->cache_head.next = new; hunk->cache_head[0].prev = cs_ind (hunk, new);
new->prev = new->next = &hunk->cache_head; hunk->cache_head[0].next = cs_ind (hunk, new);
new->prev = new->next = 0;
Cache_MakeLRU (new); Cache_MakeLRU (new);
//check_cache (); //check_cache ();
return new; return new;
} }
// search for space in existing cache // search for space in existing cache
for (cs = hunk->cache_head.next; cs != &hunk->cache_head; cs = cs->next) { for (cs = cs_ptr (hunk, hunk->cache_head[0].next);
cs != hunk->cache_head;
cs = cs_ptr (hunk, cs->next)) {
if (cs->user) if (cs->user)
continue; // block isn't free continue; // block isn't free
if (cs->size >= size) { if (cs->size >= size) {
@ -919,7 +931,7 @@ Cache_TryAlloc (memhunk_t *hunk, size_t size, qboolean nobottom)
memset (new, 0, size); memset (new, 0, size);
new->size = size; new->size = size;
new->hunk = hunk; new->hunk = hunk;
link_cache_system (new, &hunk->cache_head); link_cache_system (new, hunk->cache_head);
Cache_MakeLRU (new); Cache_MakeLRU (new);
//check_cache (); //check_cache ();
return new; return new;
@ -931,20 +943,20 @@ Cache_TryAlloc (memhunk_t *hunk, size_t size, qboolean nobottom)
static void static void
Cache_Profile_r (memhunk_t *hunk) Cache_Profile_r (memhunk_t *hunk)
{ {
cache_system_t *cs;
unsigned int i; unsigned int i;
unsigned int items[31] = {0}, sizes[31] = {0}; unsigned int items[31] = {0}, sizes[31] = {0};
int count = 0, total = 0; int count = 0, total = 0;
cache_system_t *cs;
cs = hunk->cache_head.next; for (uint32_t ind = hunk->cache_head[0].next; ind; ind = cs->next) {
while (cs != &hunk->cache_head) { cs = cs_ptr (hunk, ind);
for (i = 0; (cs->size >> (i + 1)) && i < 30; i++) for (i = 0; (cs->size >> (i + 1)) && i < 30; i++) {
; }
items[i]++; items[i]++;
sizes[i] += cs->size; sizes[i] += cs->size;
total += cs->size; total += cs->size;
count++; count++;
cs = cs->next; ind = cs->next;
} }
Sys_Printf ("Cache Profile:\n"); Sys_Printf ("Cache Profile:\n");
Sys_Printf ("%8s %8s %8s %8s %8s\n", Sys_Printf ("%8s %8s %8s %8s %8s\n",
@ -971,10 +983,10 @@ Cache_Profile (void)
static void static void
Cache_Print_r (memhunk_t *hunk) Cache_Print_r (memhunk_t *hunk)
{ {
cache_system_t *cd; cache_system_t *cs;
for (uint32_t ind = hunk->cache_head[0].next; ind; ind = cs->next) {
for (cd = hunk->cache_head.next; cd != &hunk->cache_head; cd = cd->next) { cs = cs_ptr (hunk, ind);
Sys_Printf ("%8d : %.16s\n", (int) cd->size, cd->name); Sys_Printf ("%8d : %.16s\n", (int) cs->size, cs->name);
} }
} }
@ -987,10 +999,12 @@ Cache_Print (void)
static void static void
init_cache (memhunk_t *hunk) init_cache (memhunk_t *hunk)
{ {
hunk->cache_head.next = hunk->cache_head.prev = &hunk->cache_head; hunk->cache_head[0].hunk = hunk;
hunk->cache_head.lru_next = hunk->cache_head.lru_prev = &hunk->cache_head; hunk->cache_head[0].size = 0;
hunk->cache_head.user = (cache_user_t *) 1; // make it look allocated hunk->cache_head[0].next = hunk->cache_head[0].prev = 0;
hunk->cache_head.readlock = 1; // don't try to free or move it hunk->cache_head[0].lru_next = hunk->cache_head[0].lru_prev = 0;
hunk->cache_head[0].user = (cache_user_t *) 1; // make it look allocated
hunk->cache_head[0].readlock = 1; // don't try to free or move it
} }
static void static void
@ -1014,13 +1028,13 @@ Cache_Flush_r (memhunk_t *hunk)
{ {
// cache_head.prev is guaranteed to not be free because it's the bottom // cache_head.prev is guaranteed to not be free because it's the bottom
// one and Cache_Free actually properly releases it // one and Cache_Free actually properly releases it
while (hunk->cache_head.prev != &hunk->cache_head) { while (hunk->cache_head[0].prev) {
if (!hunk->cache_head.prev->user->data) __auto_type cs = cs_ptr (hunk, hunk->cache_head[0].prev);
if (!cs->user->data)
Sys_Error ("Cache_Flush: user/system out of sync for " Sys_Error ("Cache_Flush: user/system out of sync for "
"'%.16s' with %d size", "'%.16s' with %zd size",
hunk->cache_head.prev->name, cs->name, cs->size);
(int) hunk->cache_head.prev->size); Cache_Free (cs->user); // reclaim the space
Cache_Free (hunk->cache_head.prev->user); // reclaim the space
} }
} }
@ -1062,7 +1076,6 @@ Cache_Free (cache_user_t *c)
Sys_Error ("Cache_Free: not allocated"); Sys_Error ("Cache_Free: not allocated");
cs = ((cache_system_t *) c->data) - 1; cs = ((cache_system_t *) c->data) - 1;
memhunk_t *hunk = cs->hunk;
if (cs->readlock) if (cs->readlock)
Sys_Error ("Cache_Free: attempt to free locked block"); Sys_Error ("Cache_Free: attempt to free locked block");
@ -1071,27 +1084,29 @@ Cache_Free (cache_user_t *c)
Cache_UnlinkLRU (cs); Cache_UnlinkLRU (cs);
memhunk_t *h = cs->hunk;
//check_cache (); //check_cache ();
cs->user = 0; cs->user = 0;
if (!cs->prev->user) { if (!cs_ptr (h, cs->prev)->user) {
cs->size += cs->prev->size; cs->size += cs_ptr (h, cs->prev)->size;
cs->prev->prev->next = cs; cs_ptr (h, cs_ptr (h, cs->prev)->prev)->next = cs_ind (h, cs);
cs->prev = cs->prev->prev; cs->prev = cs_ptr (h, cs->prev)->prev;
} }
if (!cs->next->user) { if (!cs_ptr (h, cs->next)->user) {
cs = cs->next; cs = cs_ptr (h, cs->next);
cs->size += cs->prev->size; cs->size += cs_ptr (h, cs->prev)->size;
cs->prev->prev->next = cs; cs_ptr (h, cs_ptr (h, cs->prev)->prev)->next = cs_ind (h, cs);
cs->prev = cs->prev->prev; cs->prev = cs_ptr (h, cs->prev)->prev;
}
if (!cs->next) {
cs_ptr (h, cs->next)->prev = cs->prev;
cs_ptr (h, cs->prev)->next = cs->next;
if (cs->prev) {
__auto_type ptr = (byte *) cs_ptr (h, cs->prev);
Hunk_FreeToHighMark (h, h->size - (ptr - h->base));
} else {
Hunk_FreeToHighMark (h, 0);
} }
if (cs->next == &hunk->cache_head) {
cs->next->prev = cs->prev;
cs->prev->next = cs->next;
if (cs->prev != &hunk->cache_head)
Hunk_FreeToHighMark (hunk,
hunk->size - ((byte*)cs->prev - hunk->base));
else
Hunk_FreeToHighMark (hunk, 0);
} }
//check_cache (); //check_cache ();