mirror of
https://git.code.sf.net/p/quake/quakeforge
synced 2025-01-20 07:50:45 +00:00
af814ff9a8
This was inspired by Hoard: A Scalable Memory Allocator for Multithreaded Applications Emery D. Berger, Kathryn S. McKinley, Robert D. Blumofe, Paul R. Wilson, It's not anywhere near the same implementation, but it did take a few basic concepts. The idea is twofold: 1) A pool of memory from which blocks can be allocated and then freed en-mass and is fairly efficient for small (4-16 byte) blocks 2) Tread safety for use with the Vulkan renderer (and any other multi-threaded tasks). However, based on the Hoard paper, small allocations are cache-line aligned. On top of that, larger allocations are page aligned. I suspect it would help qfvis somewhat if I ever get around to tweaking qfvis to use cmem.
327 lines
8.4 KiB
C
327 lines
8.4 KiB
C
/*
|
|
cmem.c
|
|
|
|
Cache-line aligned memory allocator
|
|
|
|
Copyright (C) 2020 Bill Currie <bill@taniwha.org>
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License
|
|
as published by the Free Software Foundation; either version 2
|
|
of the License, or (at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
See the GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to:
|
|
|
|
Free Software Foundation, Inc.
|
|
59 Temple Place - Suite 330
|
|
Boston, MA 02111-1307, USA
|
|
|
|
*/
|
|
#include <unistd.h>
|
|
|
|
#include "QF/alloc.h"
|
|
#include "QF/cmem.h"
|
|
|
|
|
|
memsuper_t *
|
|
new_memsuper (void)
|
|
{
|
|
memsuper_t *super = aligned_alloc (MEM_LINE_SIZE, sizeof (*super));
|
|
memset (super, 0, sizeof (*super));
|
|
super->page_size = sysconf (_SC_PAGESIZE);
|
|
super->page_mask = (super->page_size - 1);
|
|
return super;
|
|
}
|
|
|
|
void
|
|
delete_memsuper (memsuper_t *super)
|
|
{
|
|
while (super->memblocks) {
|
|
memblock_t *t = super->memblocks;
|
|
super->memblocks = super->memblocks->next;
|
|
free (t->mem);
|
|
}
|
|
free (super);
|
|
}
|
|
|
|
static memblock_t *
|
|
init_block (memsuper_t *super, void *mem, size_t alloc_size)
|
|
{
|
|
size_t size = super->page_size;
|
|
size_t mask = super->page_mask;
|
|
size_t ptr = (size_t) mem;
|
|
memblock_t *block;
|
|
|
|
block = (memblock_t *) (((ptr + size) & ~mask) - sizeof (memblock_t));
|
|
memset (block, 0, sizeof (memblock_t));
|
|
|
|
if (super->memblocks) {
|
|
super->memblocks->prev = &block->next;
|
|
}
|
|
block->next = super->memblocks;
|
|
block->prev = &super->memblocks;
|
|
super->memblocks = block;
|
|
|
|
block->mem = mem;
|
|
block->pre_size = (size_t) block - (size_t) mem;
|
|
block->post_size = alloc_size - block->pre_size - sizeof (memblock_t);
|
|
if (!((size_t) mem & mask) && block->pre_size) {
|
|
// can't use the first cache line of the page as it would be
|
|
// indistinguishable from a large block
|
|
block->pre_size -= MEM_LINE_SIZE;
|
|
}
|
|
if (block->pre_size) {
|
|
block->free_lines = (memline_t *) ((size_t) block - block->pre_size);
|
|
block->free_lines->next = 0;
|
|
block->free_lines->size = block->pre_size;
|
|
}
|
|
return block;
|
|
}
|
|
|
|
static memblock_t *
|
|
block_alloc (memsuper_t *super, size_t size)
|
|
{
|
|
memblock_t *block;
|
|
memblock_t *best = 0;
|
|
size_t best_size = ~0u;
|
|
|
|
for (block = super->memblocks; block; block = block->next) {
|
|
if (block->post_free && block->post_size >= size
|
|
&& block->post_size < best_size) {
|
|
best = block;
|
|
best_size = block->post_size;
|
|
}
|
|
}
|
|
if (best) {
|
|
best->post_free = 0;
|
|
return best;
|
|
}
|
|
|
|
size_t page_size = super->page_size;
|
|
size_t alloc_size = sizeof (memblock_t) + page_size + size;
|
|
void *mem = aligned_alloc (MEM_LINE_SIZE, alloc_size);
|
|
block = init_block (super, mem, alloc_size);
|
|
return block;
|
|
}
|
|
|
|
static void *
|
|
line_alloc (memblock_t *block, size_t size)
|
|
{
|
|
memline_t **line = &block->free_lines;
|
|
memline_t **best = 0;
|
|
memline_t *mem;
|
|
size_t best_size = ~0u;
|
|
|
|
while (*line) {
|
|
if ((*line)->size >= size && (*line)->size < best_size) {
|
|
best_size = (*line)->size;
|
|
best = line;
|
|
}
|
|
line = &(*line)->next;
|
|
}
|
|
if (!best) {
|
|
return 0;
|
|
}
|
|
mem = *best;
|
|
if (size < best_size) {
|
|
*best = (memline_t *)((size_t) mem + size);
|
|
(*best)->next = mem->next;
|
|
(*best)->size = mem->size - size;
|
|
} else {
|
|
*best = (*best)->next;
|
|
}
|
|
block->pre_allocated += size;
|
|
return mem;
|
|
}
|
|
|
|
static void
|
|
line_free (memblock_t *block, void *mem)
|
|
{
|
|
//FIXME right now, can free only single lines (need allocated lines to
|
|
// have a control block)
|
|
size_t size = MEM_LINE_SIZE;
|
|
memline_t **l;
|
|
memline_t *line = 0;
|
|
|
|
block->pre_allocated -= size;
|
|
|
|
for (l = &block->free_lines; *l; l = &(*l)->next) {
|
|
line = *l;
|
|
|
|
if ((size_t) mem + size < (size_t) line) {
|
|
// line to be freed is below the free line
|
|
break;
|
|
}
|
|
if ((size_t) mem + size == (size_t) line) {
|
|
// line to be freed is immediately below the free line
|
|
// merge with the free line
|
|
size += line->size;
|
|
line = line->next;
|
|
break;
|
|
}
|
|
if ((size_t) line + line->size == (size_t) mem) {
|
|
// line to be freed is immediately above the free line
|
|
// merge with the free line
|
|
line->size += size;
|
|
if (line->next && (size_t) line->next == (size_t) mem + size) {
|
|
line->size += line->next->size;
|
|
line->next = line->next->next;
|
|
}
|
|
return;
|
|
}
|
|
}
|
|
((memline_t *) mem)->next = line;
|
|
((memline_t *) mem)->size = size;
|
|
*l = mem;
|
|
}
|
|
|
|
static memsline_t *
|
|
sline_new (memsuper_t *super, size_t size_ind)
|
|
{
|
|
size_t size = 4 << size_ind;
|
|
size_t free_loc = (sizeof (memsline_t) + size - 1) & ~(size - 1);
|
|
memsline_t *sline = cmemalloc (super, MEM_LINE_SIZE);
|
|
sline->size = size_ind;
|
|
sline->list = free_loc >> 2;
|
|
while (free_loc + size < MEM_LINE_SIZE) {
|
|
*(uint16_t *)((size_t) sline + free_loc) = free_loc + size;
|
|
free_loc += size;
|
|
}
|
|
*(uint16_t *)((size_t) sline + free_loc) = 0;
|
|
if (super->last_freed[size_ind]) {
|
|
super->last_freed[size_ind]->prev = (size_t) &sline->next >> 6;
|
|
}
|
|
sline->next = super->last_freed[size_ind];
|
|
sline->prev = (size_t) &super->last_freed[size_ind] >> 6;
|
|
super->last_freed[size_ind] = sline;
|
|
return sline;
|
|
}
|
|
|
|
void *
|
|
cmemalloc (memsuper_t *super, size_t size)
|
|
{
|
|
size_t ind = 0;
|
|
// allocation sizes start at 4 (sizeof(float)) and go up in powers of two
|
|
while ((4u << ind) < size) {
|
|
ind++;
|
|
}
|
|
// round size up
|
|
if (size > MEM_LINE_SIZE * 8 || size > super->page_size / 8) {
|
|
// the object is large enough it could cause excessive fragmentation,
|
|
memblock_t *block = block_alloc (super, 4 << ind);
|
|
if (!block) {
|
|
return 0;
|
|
}
|
|
return block + 1;
|
|
} else {
|
|
size = 4 << ind;
|
|
if (size >= MEM_LINE_SIZE) {
|
|
// whole cache lines are required for this object
|
|
// FIXME slow
|
|
memblock_t *block = super->memblocks;
|
|
void *mem;
|
|
|
|
while (block) {
|
|
if ((mem = line_alloc (block, size))) {
|
|
return mem;
|
|
}
|
|
block = block->next;
|
|
}
|
|
/* The cache-line pool is page aligned for two reasons:
|
|
* 1) so it fits exactly within a page
|
|
* 2) the control block can be found easily
|
|
* And the reason the pool is exactly one page large is so no
|
|
* allocated line is ever page-aligned as that would make the line
|
|
* indistinguishable from a large block.
|
|
*/
|
|
mem = aligned_alloc (super->page_size, super->page_size);
|
|
block = init_block (super, mem, super->page_size);
|
|
return line_alloc (block, size);
|
|
} else {
|
|
void *mem = 0;
|
|
memsline_t **sline = &super->last_freed[ind];
|
|
if (!*sline) {
|
|
*sline = sline_new (super, ind);
|
|
}
|
|
if (*sline) {
|
|
size_t list = (*sline)->list << 2;
|
|
mem = (void *) ((size_t) *sline + list);
|
|
(*sline)->list = *(uint16_t *) mem >> 2;
|
|
if (!(*sline)->list) {
|
|
// the sub-line is full, so remove it from the free
|
|
// list. Freeing a block from the line will add it back
|
|
// to the list
|
|
memsline_t *s = *sline;
|
|
if ((*sline)->next) {
|
|
(*sline)->next->prev = (*sline)->prev;
|
|
}
|
|
*sline = (*sline)->next;
|
|
s->next = 0;
|
|
s->prev = 0;
|
|
}
|
|
}
|
|
return mem;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
unlink_block (memblock_t *block)
|
|
{
|
|
if (block->next) {
|
|
block->next->prev = block->prev;
|
|
}
|
|
*block->prev = block->next;
|
|
}
|
|
|
|
void
|
|
cmemfree (memsuper_t *super, void *mem)
|
|
{
|
|
memsline_t **super_sline;
|
|
memsline_t *sline;
|
|
memblock_t *block;
|
|
|
|
if ((size_t) mem & (MEM_LINE_SIZE - 1)) {
|
|
// sub line block
|
|
sline = (memsline_t *) ((size_t) mem & ~(MEM_LINE_SIZE - 1));
|
|
*(uint16_t *) mem = sline->list << 2;
|
|
sline->list = (size_t) mem & (MEM_LINE_SIZE - 1);
|
|
super_sline = &super->last_freed[sline->size];
|
|
if (*super_sline != sline) {
|
|
if (sline->next) {
|
|
sline->next->prev = sline->prev;
|
|
}
|
|
if (sline->prev) {
|
|
*(memsline_t **) (size_t)(sline->prev << 6) = sline->next;
|
|
}
|
|
|
|
(*super_sline)->prev = (size_t) &sline->next >> 6;
|
|
sline->next = *super_sline;
|
|
sline->prev = (size_t) super_sline >> 6;
|
|
(*super_sline) = sline;
|
|
}
|
|
return;
|
|
} else if ((size_t) mem & super->page_mask) {
|
|
// cache line
|
|
size_t page_size = super->page_size;
|
|
size_t page_mask = super->page_mask;
|
|
block = (memblock_t *) (((size_t) mem + page_size) & ~page_mask) - 1;
|
|
line_free (block, mem);
|
|
} else {
|
|
// large block
|
|
block = (memblock_t *) mem - 1;
|
|
block->post_free = 1;
|
|
}
|
|
if (!block->pre_allocated && (!block->post_size || block->post_free)) {
|
|
unlink_block (block);
|
|
free (block->mem);
|
|
}
|
|
}
|