[util] Add a cache-line aligned memory allocator

This was inspired by
Hoard: A Scalable Memory Allocator
  for Multithreaded Applications

 Emery D. Berger, Kathryn S. McKinley, Robert D. Blumofe, Paul R.
 Wilson,

It's not anywhere near the same implementation, but it did take a few
basic concepts. The idea is twofold:
1) A pool of memory from which blocks can be allocated and then freed
en-mass and is fairly efficient for small (4-16 byte) blocks
2) Tread safety for use with the Vulkan renderer (and any other
multi-threaded tasks).

However, based on the Hoard paper, small allocations are cache-line
aligned. On top of that, larger allocations are page aligned.

I suspect it would help qfvis somewhat if I ever get around to tweaking
qfvis to use cmem.
This commit is contained in:
Bill Currie 2020-12-19 17:37:22 +09:00
parent 62f3e1f428
commit af814ff9a8
5 changed files with 775 additions and 0 deletions

96
include/QF/cmem.h Normal file
View file

@ -0,0 +1,96 @@
/*
cmem.h
Cache-line aligned memory allocator
Copyright (C) 2020 Bill Currie <bill@taniwha.org>
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to:
Free Software Foundation, Inc.
59 Temple Place - Suite 330
Boston, MA 02111-1307, USA
*/
#ifndef __cmem_h
#define __cmem_h
#include "QF/qtypes.h"
#define MEM_LINE_SIZE 64
typedef struct memline_s {
struct memline_s *next;
size_t size;
size_t pad[6];
} memline_t;
typedef struct memsline_s {
struct memsline_s *next;
size_t size:2;
size_t list:4;
size_t prev:58; // memsline_t **
} memsline_t;
typedef struct memblock_s {
struct memblock_s *next;
struct memblock_s **prev;
/* The pointer to pass to free()
*/
void *mem;
memline_t *free_lines;
/* Size of memory region before block "header".
*
* Since large blocks are allocated with page-size alignment, odds are
* high that the there will be many cache lines "wasted" in the space
* between the address returned from aligned_alloc (to cache-line
* alignment) and the block itself. Setting them up as a pool makes the
* lines available for smaller allocations, thus reducing waste.
*/
size_t pre_size;
/* Size of memory region after block "header".
*
* Will be 0 for blocks that were allocated exclusively for small
* allocations, otherwise indicates the size of the allocated block.
*/
size_t post_size;
/* True if the post-header block is free to be reused.
*/
int post_free;
int pad;
size_t pre_allocated;
} memblock_t;
typedef struct memsuper_s {
size_t page_size;
size_t page_mask;
memblock_t *memblocks;
/* Allocated cache lines from which smaller blocks can be allocated.
*
* The index is the base-2 log minus 2 of the size of the elements in the
* cache line from which an element was last freed. Only 4-32 bytes are of
* interest because nothing smaller than 4 bytes (int/float) will be
* allocated, and 64 bytes and up consume entire cache lines.
*/
memsline_t *last_freed[4];
size_t pad;
} memsuper_t;
memsuper_t *new_memsuper (void);
void delete_memsuper (memsuper_t *super);
void *cmemalloc (memsuper_t *super, size_t size);
void cmemfree (memsuper_t *super, void *mem);
#endif//__cmem_h

View file

@ -45,6 +45,7 @@ libs_util_libQFutil_la_SOURCES= \
libs/util/cbuf.c \ libs/util/cbuf.c \
libs/util/checksum.c \ libs/util/checksum.c \
libs/util/cmd.c \ libs/util/cmd.c \
libs/util/cmem.c \
libs/util/crc.c \ libs/util/crc.c \
libs/util/cvar.c \ libs/util/cvar.c \
libs/util/dstring.c \ libs/util/dstring.c \

327
libs/util/cmem.c Normal file
View file

@ -0,0 +1,327 @@
/*
cmem.c
Cache-line aligned memory allocator
Copyright (C) 2020 Bill Currie <bill@taniwha.org>
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to:
Free Software Foundation, Inc.
59 Temple Place - Suite 330
Boston, MA 02111-1307, USA
*/
#include <unistd.h>
#include "QF/alloc.h"
#include "QF/cmem.h"
memsuper_t *
new_memsuper (void)
{
memsuper_t *super = aligned_alloc (MEM_LINE_SIZE, sizeof (*super));
memset (super, 0, sizeof (*super));
super->page_size = sysconf (_SC_PAGESIZE);
super->page_mask = (super->page_size - 1);
return super;
}
void
delete_memsuper (memsuper_t *super)
{
while (super->memblocks) {
memblock_t *t = super->memblocks;
super->memblocks = super->memblocks->next;
free (t->mem);
}
free (super);
}
static memblock_t *
init_block (memsuper_t *super, void *mem, size_t alloc_size)
{
size_t size = super->page_size;
size_t mask = super->page_mask;
size_t ptr = (size_t) mem;
memblock_t *block;
block = (memblock_t *) (((ptr + size) & ~mask) - sizeof (memblock_t));
memset (block, 0, sizeof (memblock_t));
if (super->memblocks) {
super->memblocks->prev = &block->next;
}
block->next = super->memblocks;
block->prev = &super->memblocks;
super->memblocks = block;
block->mem = mem;
block->pre_size = (size_t) block - (size_t) mem;
block->post_size = alloc_size - block->pre_size - sizeof (memblock_t);
if (!((size_t) mem & mask) && block->pre_size) {
// can't use the first cache line of the page as it would be
// indistinguishable from a large block
block->pre_size -= MEM_LINE_SIZE;
}
if (block->pre_size) {
block->free_lines = (memline_t *) ((size_t) block - block->pre_size);
block->free_lines->next = 0;
block->free_lines->size = block->pre_size;
}
return block;
}
static memblock_t *
block_alloc (memsuper_t *super, size_t size)
{
memblock_t *block;
memblock_t *best = 0;
size_t best_size = ~0u;
for (block = super->memblocks; block; block = block->next) {
if (block->post_free && block->post_size >= size
&& block->post_size < best_size) {
best = block;
best_size = block->post_size;
}
}
if (best) {
best->post_free = 0;
return best;
}
size_t page_size = super->page_size;
size_t alloc_size = sizeof (memblock_t) + page_size + size;
void *mem = aligned_alloc (MEM_LINE_SIZE, alloc_size);
block = init_block (super, mem, alloc_size);
return block;
}
static void *
line_alloc (memblock_t *block, size_t size)
{
memline_t **line = &block->free_lines;
memline_t **best = 0;
memline_t *mem;
size_t best_size = ~0u;
while (*line) {
if ((*line)->size >= size && (*line)->size < best_size) {
best_size = (*line)->size;
best = line;
}
line = &(*line)->next;
}
if (!best) {
return 0;
}
mem = *best;
if (size < best_size) {
*best = (memline_t *)((size_t) mem + size);
(*best)->next = mem->next;
(*best)->size = mem->size - size;
} else {
*best = (*best)->next;
}
block->pre_allocated += size;
return mem;
}
static void
line_free (memblock_t *block, void *mem)
{
//FIXME right now, can free only single lines (need allocated lines to
// have a control block)
size_t size = MEM_LINE_SIZE;
memline_t **l;
memline_t *line = 0;
block->pre_allocated -= size;
for (l = &block->free_lines; *l; l = &(*l)->next) {
line = *l;
if ((size_t) mem + size < (size_t) line) {
// line to be freed is below the free line
break;
}
if ((size_t) mem + size == (size_t) line) {
// line to be freed is immediately below the free line
// merge with the free line
size += line->size;
line = line->next;
break;
}
if ((size_t) line + line->size == (size_t) mem) {
// line to be freed is immediately above the free line
// merge with the free line
line->size += size;
if (line->next && (size_t) line->next == (size_t) mem + size) {
line->size += line->next->size;
line->next = line->next->next;
}
return;
}
}
((memline_t *) mem)->next = line;
((memline_t *) mem)->size = size;
*l = mem;
}
static memsline_t *
sline_new (memsuper_t *super, size_t size_ind)
{
size_t size = 4 << size_ind;
size_t free_loc = (sizeof (memsline_t) + size - 1) & ~(size - 1);
memsline_t *sline = cmemalloc (super, MEM_LINE_SIZE);
sline->size = size_ind;
sline->list = free_loc >> 2;
while (free_loc + size < MEM_LINE_SIZE) {
*(uint16_t *)((size_t) sline + free_loc) = free_loc + size;
free_loc += size;
}
*(uint16_t *)((size_t) sline + free_loc) = 0;
if (super->last_freed[size_ind]) {
super->last_freed[size_ind]->prev = (size_t) &sline->next >> 6;
}
sline->next = super->last_freed[size_ind];
sline->prev = (size_t) &super->last_freed[size_ind] >> 6;
super->last_freed[size_ind] = sline;
return sline;
}
void *
cmemalloc (memsuper_t *super, size_t size)
{
size_t ind = 0;
// allocation sizes start at 4 (sizeof(float)) and go up in powers of two
while ((4u << ind) < size) {
ind++;
}
// round size up
if (size > MEM_LINE_SIZE * 8 || size > super->page_size / 8) {
// the object is large enough it could cause excessive fragmentation,
memblock_t *block = block_alloc (super, 4 << ind);
if (!block) {
return 0;
}
return block + 1;
} else {
size = 4 << ind;
if (size >= MEM_LINE_SIZE) {
// whole cache lines are required for this object
// FIXME slow
memblock_t *block = super->memblocks;
void *mem;
while (block) {
if ((mem = line_alloc (block, size))) {
return mem;
}
block = block->next;
}
/* The cache-line pool is page aligned for two reasons:
* 1) so it fits exactly within a page
* 2) the control block can be found easily
* And the reason the pool is exactly one page large is so no
* allocated line is ever page-aligned as that would make the line
* indistinguishable from a large block.
*/
mem = aligned_alloc (super->page_size, super->page_size);
block = init_block (super, mem, super->page_size);
return line_alloc (block, size);
} else {
void *mem = 0;
memsline_t **sline = &super->last_freed[ind];
if (!*sline) {
*sline = sline_new (super, ind);
}
if (*sline) {
size_t list = (*sline)->list << 2;
mem = (void *) ((size_t) *sline + list);
(*sline)->list = *(uint16_t *) mem >> 2;
if (!(*sline)->list) {
// the sub-line is full, so remove it from the free
// list. Freeing a block from the line will add it back
// to the list
memsline_t *s = *sline;
if ((*sline)->next) {
(*sline)->next->prev = (*sline)->prev;
}
*sline = (*sline)->next;
s->next = 0;
s->prev = 0;
}
}
return mem;
}
}
return 0;
}
static void
unlink_block (memblock_t *block)
{
if (block->next) {
block->next->prev = block->prev;
}
*block->prev = block->next;
}
void
cmemfree (memsuper_t *super, void *mem)
{
memsline_t **super_sline;
memsline_t *sline;
memblock_t *block;
if ((size_t) mem & (MEM_LINE_SIZE - 1)) {
// sub line block
sline = (memsline_t *) ((size_t) mem & ~(MEM_LINE_SIZE - 1));
*(uint16_t *) mem = sline->list << 2;
sline->list = (size_t) mem & (MEM_LINE_SIZE - 1);
super_sline = &super->last_freed[sline->size];
if (*super_sline != sline) {
if (sline->next) {
sline->next->prev = sline->prev;
}
if (sline->prev) {
*(memsline_t **) (size_t)(sline->prev << 6) = sline->next;
}
(*super_sline)->prev = (size_t) &sline->next >> 6;
sline->next = *super_sline;
sline->prev = (size_t) super_sline >> 6;
(*super_sline) = sline;
}
return;
} else if ((size_t) mem & super->page_mask) {
// cache line
size_t page_size = super->page_size;
size_t page_mask = super->page_mask;
block = (memblock_t *) (((size_t) mem + page_size) & ~page_mask) - 1;
line_free (block, mem);
} else {
// large block
block = (memblock_t *) mem - 1;
block->post_free = 1;
}
if (!block->pre_allocated && (!block->post_size || block->post_free)) {
unlink_block (block);
free (block->mem);
}
}

View file

@ -1,5 +1,6 @@
libs_util_tests = \ libs_util_tests = \
libs/util/test/test-bary \ libs/util/test/test-bary \
libs/util/test/test-cmem \
libs/util/test/test-cs \ libs/util/test/test-cs \
libs/util/test/test-darray \ libs/util/test/test-darray \
libs/util/test/test-dq \ libs/util/test/test-dq \
@ -23,6 +24,10 @@ libs_util_test_test_bary_SOURCES=libs/util/test/test-bary.c
libs_util_test_test_bary_LDADD=libs/util/libQFutil.la libs_util_test_test_bary_LDADD=libs/util/libQFutil.la
libs_util_test_test_bary_DEPENDENCIES=libs/util/libQFutil.la libs_util_test_test_bary_DEPENDENCIES=libs/util/libQFutil.la
libs_util_test_test_cmem_SOURCES=libs/util/test/test-cmem.c
libs_util_test_test_cmem_LDADD=libs/util/libQFutil.la
libs_util_test_test_cmem_DEPENDENCIES=libs/util/libQFutil.la
libs_util_test_test_cs_SOURCES=libs/util/test/test-cs.c libs_util_test_test_cs_SOURCES=libs/util/test/test-cs.c
libs_util_test_test_cs_LDADD=libs/util/libQFutil.la libs_util_test_test_cs_LDADD=libs/util/libQFutil.la
libs_util_test_test_cs_DEPENDENCIES=libs/util/libQFutil.la libs_util_test_test_cs_DEPENDENCIES=libs/util/libQFutil.la

346
libs/util/test/test-cmem.c Normal file
View file

@ -0,0 +1,346 @@
#include <stdio.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include "QF/cmem.h"
static int
test_block (memsuper_t *super)
{
size_t size = super->page_size;
void *mem = cmemalloc (super, size);
memblock_t *block;
if (!mem) {
fprintf (stderr, "could not allocate %zd byte block\n",
super->page_size);
return 0;
}
if ((size_t) mem & super->page_mask) {
fprintf (stderr, "mem not page aligned: %p %zd\n",
mem, super->page_size);
return 0;
}
block = super->memblocks;
if (mem != block + 1) {
fprintf (stderr, "super does not point to mem\n");
return 0;
}
if (block->post_size < size) {
fprintf (stderr, "block post_size too small: %zd < %zd\n",
block->post_size, size);
return 0;
}
if (block->post_size - size >= super->page_size) {
fprintf (stderr, "block post_size too big: %zd < %zd\n",
block->post_size - size, super->page_size);
return 0;
}
memset (mem, 0, size); // valgrind check
cmemfree (super, mem);
if (super->memblocks) {
fprintf (stderr, "super still points to mem\n");
return 0;
}
return 1;
}
static int
test_line (memsuper_t *super)
{
memline_t *line1 = cmemalloc (super, MEM_LINE_SIZE);
memline_t *line2 = cmemalloc (super, MEM_LINE_SIZE);
memline_t *line3 = cmemalloc (super, MEM_LINE_SIZE);
memblock_t *block = super->memblocks;
if (block->next) {
fprintf (stderr, "too many memblocks\n");
return 0;
}
if (line1 < (memline_t *) block->mem || line1 >= (memline_t *) block) {
fprintf (stderr, "line1 outside block line pool\n");
return 0;
}
if (line2 < (memline_t *) block->mem || line2 >= (memline_t *) block) {
fprintf (stderr, "line2 outside block line pool\n");
return 0;
}
if (line3 < (memline_t *) block->mem || line3 >= (memline_t *) block) {
fprintf (stderr, "line3 outside block line pool\n");
return 0;
}
if (!((size_t) line1 & super->page_mask)) {
fprintf (stderr, "line1 is page aligned\n");
return 0;
}
if (!((size_t) line2 & super->page_mask)) {
fprintf (stderr, "line2 is page aligned\n");
return 0;
}
if (!((size_t) line3 & super->page_mask)) {
fprintf (stderr, "line3 is page aligned\n");
return 0;
}
if (line1 + 1 != line2 || line2 + 1 != line3) {
fprintf (stderr, "lines not contiguous\n");
return 0;
}
if (line3 + 1 != block->free_lines) {
fprintf (stderr, "line3 not contiguous with free lines\n");
return 0;
}
if (block->free_lines->next) {
fprintf (stderr, "multiple free line blocks\n");
return 0;
}
if (block->pre_allocated != 3 * MEM_LINE_SIZE) {
fprintf (stderr, "pre_allocated wrong size: %zd != %d\n",
block->pre_allocated, 3 * MEM_LINE_SIZE);
return 0;
}
if (block->free_lines->size != block->pre_size - block->pre_allocated) {
fprintf (stderr, "free lines wrong size: %zd != %zd\n",
block->free_lines->size,
block->pre_size - block->pre_allocated);
return 0;
}
size_t old_size = block->free_lines->size;
memline_t *old_line = block->free_lines;
cmemfree (super, line2);
if (block->pre_allocated != 2 * MEM_LINE_SIZE) {
fprintf (stderr, "pre_allocated wrong size: %zd != %d\n",
block->pre_allocated, 2 * MEM_LINE_SIZE);
return 0;
}
if (block->free_lines != line2) {
fprintf (stderr, "free lines not pointing to line2\n");
return 0;
}
if (!block->free_lines->next || block->free_lines->next->next) {
fprintf (stderr, "incorrect number of free blocks\n");
return 0;
}
if (line2->next != old_line || old_line->size != old_size) {
fprintf (stderr, "free line blocks corrupted\n");
return 0;
}
if (block->free_lines->size != MEM_LINE_SIZE) {
fprintf (stderr, "free line block wrong size: %zd != %d\n",
block->free_lines->size, MEM_LINE_SIZE);
return 0;
}
cmemfree (super, line3);
if (block->free_lines != line2) {
fprintf (stderr, "free lines not pointing to line2 2\n");
return 0;
}
if (block->pre_allocated != MEM_LINE_SIZE) {
fprintf (stderr, "pre_allocated wrong size: %zd != %d\n",
block->pre_allocated, MEM_LINE_SIZE);
return 0;
}
if (block->free_lines->size != block->pre_size - block->pre_allocated) {
fprintf (stderr, "free lines wrong size: %zd != %zd\n",
block->free_lines->size,
block->pre_size - block->pre_allocated);
return 0;
}
cmemfree (super, line1);
if (super->memblocks) {
fprintf (stderr, "line pool not freed\n");
return 0;
}
return 1;
}
static int
test_sline (memsuper_t *super)
{
void *mem[] = {
//cmemalloc (super, 2), // smaller than min size
cmemalloc (super, 4),
cmemalloc (super, 4),
cmemalloc (super, 8),
cmemalloc (super, 8),
cmemalloc (super, 16),
cmemalloc (super, 16),
cmemalloc (super, 32),
cmemalloc (super, 32),
};
#define mem_size (sizeof (mem) / sizeof (mem[0]))
int fail = 0;
for (size_t i = 0; i < mem_size; i++) {
printf("%p\n", mem[i]);
if (!mem[i]) {
fprintf (stderr, "mem[%zd] is null\n", i);
fail = 1;
}
for (size_t j = i + 1; j < mem_size; j++) {
if (mem[i] == mem[j]) {
fprintf (stderr, "mem[%zd] is dupped with %zd\n", i, j);
fail = 1;
}
}
}
if (fail) {
return 0;
}
#undef mem_size
return 1;
}
static int
test_block_line (memsuper_t *super)
{
void *mem = cmemalloc (super, 2 * super->page_size);
void *line;
memblock_t *block = super->memblocks;
if (block + 1 != (memblock_t *) mem) {
fprintf (stderr, "super memblocks do not point to mem\n");
return 0;
}
if (block->pre_size < MEM_LINE_SIZE) {
// need to figure out a way to guarantee a shared block
fprintf (stderr, "can't allocate line from block\n");
return 0;
}
if (block->next) {
fprintf (stderr, "excess blocks in super\n");
return 0;
}
line = cmemalloc (super, MEM_LINE_SIZE);
if (!((size_t) line & super->page_mask)) {
fprintf (stderr, "line is page aligned\n");
return 0;
}
if (super->memblocks->next) {
// need to figure out a way to guarantee a shared block
fprintf (stderr, "mem and line not in same block\n");
return 0;
}
cmemfree (super, mem);
if (!super->memblocks) {
fprintf (stderr, "shared block freed\n");
return 0;
}
if (cmemalloc (super, super->page_size) != mem) {
fprintf (stderr, "block not reused for mem\n");
return 0;
}
if (super->memblocks != block || super->memblocks->next) {
// need to figure out a way to guarantee a shared block
fprintf (stderr, "blocks corrupt\n");
return 0;
}
cmemfree (super, line);
if (!super->memblocks) {
fprintf (stderr, "shared block freed 2\n");
return 0;
}
cmemfree (super, mem);
if (super->memblocks) {
fprintf (stderr, "shared block not freed\n");
return 0;
}
return 1;
}
int
main (void)
{
memsuper_t *super = new_memsuper ();
if (sizeof (memsuper_t) != MEM_LINE_SIZE) {
fprintf (stderr, "memsuper_t not cache size: %zd\n",
sizeof (memline_t));
return 1;
}
if (sizeof (memline_t) != MEM_LINE_SIZE) {
fprintf (stderr, "memline_t not cache size: %zd\n",
sizeof (memline_t));
return 1;
}
if (sizeof (memsline_t) != 2 * sizeof (void *)) {
fprintf (stderr, "memsline_t not two pointers: %zd\n",
sizeof (memsline_t));
return 1;
}
if (sizeof (memblock_t) != MEM_LINE_SIZE) {
fprintf (stderr, "memblock_t not cache size: %zd\n",
sizeof (memblock_t));
return 1;
}
if ((size_t) super & (MEM_LINE_SIZE - 1)) {
fprintf (stderr, "super block not cache aligned: %p\n", super);
return 1;
}
if (super->page_size != (size_t) sysconf (_SC_PAGESIZE)) {
fprintf (stderr, "page size not equal to system page size: %zd, %zd\n",
super->page_size, sysconf (_SC_PAGESIZE));
return 1;
}
if (!super->page_size || (super->page_size & (super->page_size - 1))) {
fprintf (stderr, "page size not power of two: %zd\n",
super->page_size);
return 1;
}
if (super->page_mask + 1 != super->page_size) {
fprintf (stderr, "page mask not page size - 1: %zx %zx\n",
super->page_mask, super->page_size);
return 1;
}
if (!super->page_mask || (super->page_mask & (super->page_mask + 1))) {
fprintf (stderr, "page mask not all 1s: %zx\n",
super->page_mask);
return 1;
}
if (super->memblocks) {
fprintf (stderr, "super block list not null\n");
return 1;
}
if (!test_block (super)) {
fprintf (stderr, "block tests failed\n");
}
if (super->memblocks) {
fprintf (stderr, "super block list not null 2\n");
return 1;
}
if (!test_line (super)) {
fprintf (stderr, "line tests failed\n");
return 1;
}
if (super->memblocks) {
fprintf (stderr, "super block list not null 2\n");
return 1;
}
if (!test_block_line (super)) {
fprintf (stderr, "block-line tests failed\n");
return 1;
}
for (size_t i = 0; i < 2 * super->page_size / MEM_LINE_SIZE; i++) {
void *line = cmemalloc (super, MEM_LINE_SIZE);
if (!line) {
fprintf (stderr, "could not allocate %d byte line\n",
MEM_LINE_SIZE);
return 1;
}
if ((size_t) line % MEM_LINE_SIZE) {
fprintf (stderr, "line not cache-line aligned: %p %d\n",
line, MEM_LINE_SIZE);
return 1;
}
if (!((size_t) line & super->page_mask)) {
fprintf (stderr, "line is page aligned: %p %zd\n",
line, super->page_size);
return 1;
}
}
if (!test_sline (super)) {
fprintf (stderr, "sub-line tests failed\n");
return 1;
}
delete_memsuper (super);
return 0;
}