quakeforge/libs/util/cmem.c
Bill Currie 0a847f92f1 [util] Use mmap/munmap for cmem internal alloc/free
This reduces the overhead needed to manage the memory blocks as the
blocks are guaranteed to be page-aligned. Also, the superblock is now
alllocated from within one of the memory blocks it manages. While this
does slightly reduce the available cachelines within the first block (by
one or two depending on 32 vs 64 bit pointers), it removes the need for
an extra memory allocation (probably via malloc) for the superblock.
2021-07-12 16:33:47 +09:00

424 lines
11 KiB
C

/*
cmem.c
Cache-line aligned memory allocator
Copyright (C) 2020 Bill Currie <bill@taniwha.org>
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to:
Free Software Foundation, Inc.
59 Temple Place - Suite 330
Boston, MA 02111-1307, USA
*/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include "QF/alloc.h"
#include "QF/cmem.h"
#include "QF/sys.h"
static size_t __attribute__((const))
ilog2 (size_t x)
{
size_t l = 0;
while (x >>= 1) {
l++;
}
return l;
}
static void
link_free_line (memsuper_t *super, memline_t *line)
{
size_t ind = ilog2 (line->size) - 6;
if (super->free_lines[ind]) {
super->free_lines[ind]->free_prev = &line->free_next;
}
line->free_next = super->free_lines[ind];
line->free_prev = &super->free_lines[ind];
super->free_lines[ind] = line;
}
static void
unlink_free_line (memline_t *line)
{
if (line->free_next) {
line->free_next->free_prev = line->free_prev;
}
*line->free_prev = line->free_next;
}
static void
unlink_line (memline_t *line)
{
if (line->block_next) {
line->block_next->block_prev = line->block_prev;
}
*line->block_prev = line->block_next;
unlink_free_line (line);
}
static memblock_t *
init_block (memsuper_t *super, void *mem, size_t alloc_size)
{
memblock_t *block = mem;
memset (block, 0, sizeof (memblock_t));
if (super->memblocks) {
super->memblocks->prev = &block->next;
}
block->next = super->memblocks;
block->prev = &super->memblocks;
super->memblocks = block;
block->size = super->page_size - sizeof (*block);
block->post_size = alloc_size - super->page_size;
memline_t *line = (memline_t *) (block + 1);
line->block = block;
line->size = block->size;
line->block_next = 0;
line->block_prev = &block->free_lines;
block->free_lines = line;
link_free_line (super, line);
return block;
}
static memblock_t *
block_alloc (memsuper_t *super, size_t size)
{
memblock_t *block;
memblock_t *best = 0;
size_t best_size = ~0u;
for (block = super->memblocks; block; block = block->next) {
if (block->post_free && block->post_size >= size
&& block->post_size < best_size) {
best = block;
best_size = block->post_size;
}
}
if (best) {
best->post_free = 0;
return best;
}
size_t page_size = super->page_size;
size_t alloc_size = page_size + size;
void *mem = Sys_Alloc (alloc_size);
block = init_block (super, mem, alloc_size);
return block;
}
static void *
alloc_line (memline_t *line, size_t size)
{
void *mem = line;
if (line->size > size) {
// split the line block and insert the new block into the list
memline_t *split = (memline_t *)((size_t) line + size);
split->block = line->block;
split->size = line->size - size;
line->size = size;
split->block_next = line->block_next;
if (split->block_next) {
split->block_next->block_prev = &split->block_next;
}
line->block_next = split;
split->block_prev = &line->block_next;
split->free_next = line->free_next;
if (split->free_next) {
split->free_next->free_prev = &split->free_next;
}
line->free_next = split;
split->free_prev = &line->free_next;
}
line->block->allocated += line->size;
unlink_line (line);
return mem;
}
static void
line_free (memsuper_t *super, memblock_t *block, void *mem)
{
//FIXME right now, can free only single lines (need allocated lines to
// have a control block)
size_t size = MEM_LINE_SIZE;
memline_t **l;
memline_t *line = 0;
block->allocated -= size;
for (l = &block->free_lines; *l; l = &(*l)->block_next) {
line = *l;
if (line->block_next && line->block_next < line) {
*(int *)0 = 0;
}
if ((size_t) mem + size < (size_t) line) {
// line to be freed is below the free line
break;
}
if ((size_t) mem + size == (size_t) line) {
// line to be freed is immediately below the free line
// merge with the free line by "allocating" the line and then
// "freeing" it with the line to be freed
size += line->size;
unlink_line (line); // does not modify line->block_next
line = line->block_next;
break;
}
if ((size_t) line + line->size == (size_t) mem) {
// line to be freed is immediately above the free line
// merge with the free line by growing the line
line->size += size;
if (line->block_next
&& (size_t) line->block_next == (size_t) mem + size) {
// the line to be freed connects two free lines
line->size += line->block_next->size;
unlink_line (line->block_next);
}
// the line changed size so needs to be relinked in the super
unlink_free_line (line);
link_free_line (super, line);
return;
}
if ((size_t) mem >= (size_t) line
&& (size_t) mem < (size_t) line + line->size) {
*(int *) 0 = 0;
}
line = 0;
}
memline_t *memline = (memline_t *) mem;
memline->block_next = line;
if (memline->block_next) {
memline->block_next->block_prev = &memline->block_next;
}
memline->block_prev = l;
memline->size = size;
memline->block = block;
*l = memline;
link_free_line (super, memline);
}
static memsline_t *
sline_new (memsuper_t *super, size_t size_ind)
{
size_t size = 4 << size_ind;
size_t free_loc = (sizeof (memsline_t) + size - 1) & ~(size - 1);
memsline_t *sline = cmemalloc (super, MEM_LINE_SIZE);
sline->size = size_ind;
sline->list = free_loc >> 2;
while (free_loc + size < MEM_LINE_SIZE) {
*(uint16_t *)((size_t) sline + free_loc) = free_loc + size;
free_loc += size;
}
*(uint16_t *)((size_t) sline + free_loc) = 0;
if (super->last_freed[size_ind]) {
super->last_freed[size_ind]->prev = (size_t) &sline->next >> 6;
}
sline->next = super->last_freed[size_ind];
sline->prev = (size_t) &super->last_freed[size_ind] >> 6;
super->last_freed[size_ind] = sline;
return sline;
}
void *
cmemalloc (memsuper_t *super, size_t size)
{
size_t ind = 0;
// allocation sizes start at 4 (sizeof(float)) and go up in powers of two
while ((4u << ind) < size) {
ind++;
}
// round size up
if (size > MEM_LINE_SIZE * 8 || size > super->page_size / 8) {
// the object is large enough it could cause excessive fragmentation,
memblock_t *block = block_alloc (super, 4 << ind);
if (!block) {
return 0;
}
return (void *) ((size_t) block + super->page_size);
} else {
size = 4 << ind;
if (size >= MEM_LINE_SIZE) {
// whole cache lines are required for this object
// convert from byte log2 to cache-line log2
ind -= 4;
memline_t *line = 0;
while (!line && ind < MAX_CACHE_LINES) {
line = super->free_lines[ind++];
}
while (line && line->size < size) {
line = line->free_next;
}
if (!line) {
// need a new line, one that doesn't make me fe... wrong song
void *mem;
/* The cache-line pool is page aligned for two reasons:
* 1) so it fits exactly within a page
* 2) the control block can be found easily
* And the reason the pool is exactly one page large is so no
* allocated line is ever page-aligned as that would make the
* line indistinguishable from a large block.
*/
mem = Sys_Alloc (super->page_size);
// sets super->free_lines, the block is guarnateed to be big
// enough to hold the requested allocation as otherwise a full
// block allocation would have been used
memblock_t *block = init_block (super, mem, super->page_size);
line = block->free_lines;
}
return alloc_line (line, size);
} else {
void *mem = 0;
memsline_t **sline = &super->last_freed[ind];
if (!*sline) {
*sline = sline_new (super, ind);
}
if (*sline) {
size_t list = (*sline)->list << 2;
mem = (void *) ((size_t) *sline + list);
(*sline)->list = *(uint16_t *) mem >> 2;
if (!(*sline)->list) {
// the sub-line is full, so remove it from the free
// list. Freeing a block from the line will add it back
// to the list
memsline_t *s = *sline;
if ((*sline)->next) {
(*sline)->next->prev = (*sline)->prev;
}
*sline = (*sline)->next;
s->next = 0;
s->prev = 0;
}
}
return mem;
}
}
return 0;
}
static void
unlink_block (memblock_t *block)
{
if (!block->free_lines || block->free_lines->block_next) {
*(int *) 0 = 0;
}
unlink_line (block->free_lines);
if (block->next) {
block->next->prev = block->prev;
}
*block->prev = block->next;
}
void
cmemfree (memsuper_t *super, void *mem)
{
memsline_t **super_sline;
memsline_t *sline;
memblock_t *block;
if ((size_t) mem & (MEM_LINE_SIZE - 1)) {
// sub line block
sline = (memsline_t *) ((size_t) mem & ~(MEM_LINE_SIZE - 1));
*(uint16_t *) mem = sline->list << 2;
sline->list = ((size_t) mem & (MEM_LINE_SIZE - 1)) >> 2;
super_sline = &super->last_freed[sline->size];
if (*super_sline != sline) {
if (sline->next) {
sline->next->prev = sline->prev;
}
if (sline->prev) {
*(memsline_t **) (size_t)(sline->prev << 6) = sline->next;
}
if (*super_sline) {
(*super_sline)->prev = (size_t) &sline->next >> 6;
}
sline->next = *super_sline;
sline->prev = (size_t) super_sline >> 6;
(*super_sline) = sline;
}
return;
} else if ((size_t) mem & super->page_mask) {
// cache line
block = (memblock_t *) ((size_t) mem & ~super->page_mask);
line_free (super, block, mem);
} else {
// large block
block = (memblock_t *) ((size_t) mem - super->page_size);
block->post_free = 1;
}
if (!block->allocated && (!block->post_size || block->post_free)) {
unlink_block (block);
Sys_Free (block, super->page_size + block->post_size);
}
}
memsuper_t *
new_memsuper (void)
{
// Temporary superblock used to bootstrap a pool
memsuper_t bootstrap = { };
bootstrap.page_size = Sys_PageSize ();
bootstrap.page_mask = (bootstrap.page_size - 1);
// Allocate the real superblock from the pool. As a superblock is only
// two cache lines large (for 64-byte cache lines), it will always be
// allocated using a block's cache lines, and thus will be inside the first
// block.
memsuper_t *super = cmemalloc (&bootstrap, sizeof (*super));
*super = bootstrap;
// The block used to allocate the real superblock points to the bootstrap
// superblock, but needs to point to the real superblock.
super->memblocks->prev = &super->memblocks;
// Any free cache line block chains will also point to the bootstrap
// block instead of the resl superblock, so fix them up too (there should
// be only one, but no harm in being paranoid)
for (int i = 0; i < MAX_CACHE_LINES; i++) {
if (super->free_lines[i]) {
super->free_lines[i]->free_prev = &super->free_lines[i];
}
}
return super;
}
void
delete_memsuper (memsuper_t *super)
{
// The block holding the superblock is always the last block in the list
while (super->memblocks && super->memblocks->next) {
memblock_t *t = super->memblocks;
super->memblocks = super->memblocks->next;
Sys_Free (t, super->page_size + t->post_size);
}
memblock_t *block = super->memblocks;
Sys_Free (block, super->page_size + block->post_size);
}