diff --git a/include/QF/cmem.h b/include/QF/cmem.h new file mode 100644 index 000000000..966696d6c --- /dev/null +++ b/include/QF/cmem.h @@ -0,0 +1,96 @@ +/* + cmem.h + + Cache-line aligned memory allocator + + Copyright (C) 2020 Bill Currie + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + + See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to: + + Free Software Foundation, Inc. + 59 Temple Place - Suite 330 + Boston, MA 02111-1307, USA + +*/ +#ifndef __cmem_h +#define __cmem_h + +#include "QF/qtypes.h" + +#define MEM_LINE_SIZE 64 + +typedef struct memline_s { + struct memline_s *next; + size_t size; + size_t pad[6]; +} memline_t; + +typedef struct memsline_s { + struct memsline_s *next; + size_t size:2; + size_t list:4; + size_t prev:58; // memsline_t ** +} memsline_t; + +typedef struct memblock_s { + struct memblock_s *next; + struct memblock_s **prev; + /* The pointer to pass to free() + */ + void *mem; + memline_t *free_lines; + /* Size of memory region before block "header". + * + * Since large blocks are allocated with page-size alignment, odds are + * high that the there will be many cache lines "wasted" in the space + * between the address returned from aligned_alloc (to cache-line + * alignment) and the block itself. Setting them up as a pool makes the + * lines available for smaller allocations, thus reducing waste. + */ + size_t pre_size; + /* Size of memory region after block "header". + * + * Will be 0 for blocks that were allocated exclusively for small + * allocations, otherwise indicates the size of the allocated block. + */ + size_t post_size; + /* True if the post-header block is free to be reused. + */ + int post_free; + int pad; + size_t pre_allocated; +} memblock_t; + +typedef struct memsuper_s { + size_t page_size; + size_t page_mask; + memblock_t *memblocks; + /* Allocated cache lines from which smaller blocks can be allocated. + * + * The index is the base-2 log minus 2 of the size of the elements in the + * cache line from which an element was last freed. Only 4-32 bytes are of + * interest because nothing smaller than 4 bytes (int/float) will be + * allocated, and 64 bytes and up consume entire cache lines. + */ + memsline_t *last_freed[4]; + size_t pad; +} memsuper_t; + +memsuper_t *new_memsuper (void); +void delete_memsuper (memsuper_t *super); +void *cmemalloc (memsuper_t *super, size_t size); +void cmemfree (memsuper_t *super, void *mem); + +#endif//__cmem_h diff --git a/libs/util/Makemodule.am b/libs/util/Makemodule.am index 272459d4d..af99b15d3 100644 --- a/libs/util/Makemodule.am +++ b/libs/util/Makemodule.am @@ -45,6 +45,7 @@ libs_util_libQFutil_la_SOURCES= \ libs/util/cbuf.c \ libs/util/checksum.c \ libs/util/cmd.c \ + libs/util/cmem.c \ libs/util/crc.c \ libs/util/cvar.c \ libs/util/dstring.c \ diff --git a/libs/util/cmem.c b/libs/util/cmem.c new file mode 100644 index 000000000..b446e58f0 --- /dev/null +++ b/libs/util/cmem.c @@ -0,0 +1,327 @@ +/* + cmem.c + + Cache-line aligned memory allocator + + Copyright (C) 2020 Bill Currie + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + + See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to: + + Free Software Foundation, Inc. + 59 Temple Place - Suite 330 + Boston, MA 02111-1307, USA + +*/ +#include + +#include "QF/alloc.h" +#include "QF/cmem.h" + + +memsuper_t * +new_memsuper (void) +{ + memsuper_t *super = aligned_alloc (MEM_LINE_SIZE, sizeof (*super)); + memset (super, 0, sizeof (*super)); + super->page_size = sysconf (_SC_PAGESIZE); + super->page_mask = (super->page_size - 1); + return super; +} + +void +delete_memsuper (memsuper_t *super) +{ + while (super->memblocks) { + memblock_t *t = super->memblocks; + super->memblocks = super->memblocks->next; + free (t->mem); + } + free (super); +} + +static memblock_t * +init_block (memsuper_t *super, void *mem, size_t alloc_size) +{ + size_t size = super->page_size; + size_t mask = super->page_mask; + size_t ptr = (size_t) mem; + memblock_t *block; + + block = (memblock_t *) (((ptr + size) & ~mask) - sizeof (memblock_t)); + memset (block, 0, sizeof (memblock_t)); + + if (super->memblocks) { + super->memblocks->prev = &block->next; + } + block->next = super->memblocks; + block->prev = &super->memblocks; + super->memblocks = block; + + block->mem = mem; + block->pre_size = (size_t) block - (size_t) mem; + block->post_size = alloc_size - block->pre_size - sizeof (memblock_t); + if (!((size_t) mem & mask) && block->pre_size) { + // can't use the first cache line of the page as it would be + // indistinguishable from a large block + block->pre_size -= MEM_LINE_SIZE; + } + if (block->pre_size) { + block->free_lines = (memline_t *) ((size_t) block - block->pre_size); + block->free_lines->next = 0; + block->free_lines->size = block->pre_size; + } + return block; +} + +static memblock_t * +block_alloc (memsuper_t *super, size_t size) +{ + memblock_t *block; + memblock_t *best = 0; + size_t best_size = ~0u; + + for (block = super->memblocks; block; block = block->next) { + if (block->post_free && block->post_size >= size + && block->post_size < best_size) { + best = block; + best_size = block->post_size; + } + } + if (best) { + best->post_free = 0; + return best; + } + + size_t page_size = super->page_size; + size_t alloc_size = sizeof (memblock_t) + page_size + size; + void *mem = aligned_alloc (MEM_LINE_SIZE, alloc_size); + block = init_block (super, mem, alloc_size); + return block; +} + +static void * +line_alloc (memblock_t *block, size_t size) +{ + memline_t **line = &block->free_lines; + memline_t **best = 0; + memline_t *mem; + size_t best_size = ~0u; + + while (*line) { + if ((*line)->size >= size && (*line)->size < best_size) { + best_size = (*line)->size; + best = line; + } + line = &(*line)->next; + } + if (!best) { + return 0; + } + mem = *best; + if (size < best_size) { + *best = (memline_t *)((size_t) mem + size); + (*best)->next = mem->next; + (*best)->size = mem->size - size; + } else { + *best = (*best)->next; + } + block->pre_allocated += size; + return mem; +} + +static void +line_free (memblock_t *block, void *mem) +{ + //FIXME right now, can free only single lines (need allocated lines to + // have a control block) + size_t size = MEM_LINE_SIZE; + memline_t **l; + memline_t *line = 0; + + block->pre_allocated -= size; + + for (l = &block->free_lines; *l; l = &(*l)->next) { + line = *l; + + if ((size_t) mem + size < (size_t) line) { + // line to be freed is below the free line + break; + } + if ((size_t) mem + size == (size_t) line) { + // line to be freed is immediately below the free line + // merge with the free line + size += line->size; + line = line->next; + break; + } + if ((size_t) line + line->size == (size_t) mem) { + // line to be freed is immediately above the free line + // merge with the free line + line->size += size; + if (line->next && (size_t) line->next == (size_t) mem + size) { + line->size += line->next->size; + line->next = line->next->next; + } + return; + } + } + ((memline_t *) mem)->next = line; + ((memline_t *) mem)->size = size; + *l = mem; +} + +static memsline_t * +sline_new (memsuper_t *super, size_t size_ind) +{ + size_t size = 4 << size_ind; + size_t free_loc = (sizeof (memsline_t) + size - 1) & ~(size - 1); + memsline_t *sline = cmemalloc (super, MEM_LINE_SIZE); + sline->size = size_ind; + sline->list = free_loc >> 2; + while (free_loc + size < MEM_LINE_SIZE) { + *(uint16_t *)((size_t) sline + free_loc) = free_loc + size; + free_loc += size; + } + *(uint16_t *)((size_t) sline + free_loc) = 0; + if (super->last_freed[size_ind]) { + super->last_freed[size_ind]->prev = (size_t) &sline->next >> 6; + } + sline->next = super->last_freed[size_ind]; + sline->prev = (size_t) &super->last_freed[size_ind] >> 6; + super->last_freed[size_ind] = sline; + return sline; +} + +void * +cmemalloc (memsuper_t *super, size_t size) +{ + size_t ind = 0; + // allocation sizes start at 4 (sizeof(float)) and go up in powers of two + while ((4u << ind) < size) { + ind++; + } + // round size up + if (size > MEM_LINE_SIZE * 8 || size > super->page_size / 8) { + // the object is large enough it could cause excessive fragmentation, + memblock_t *block = block_alloc (super, 4 << ind); + if (!block) { + return 0; + } + return block + 1; + } else { + size = 4 << ind; + if (size >= MEM_LINE_SIZE) { + // whole cache lines are required for this object + // FIXME slow + memblock_t *block = super->memblocks; + void *mem; + + while (block) { + if ((mem = line_alloc (block, size))) { + return mem; + } + block = block->next; + } + /* The cache-line pool is page aligned for two reasons: + * 1) so it fits exactly within a page + * 2) the control block can be found easily + * And the reason the pool is exactly one page large is so no + * allocated line is ever page-aligned as that would make the line + * indistinguishable from a large block. + */ + mem = aligned_alloc (super->page_size, super->page_size); + block = init_block (super, mem, super->page_size); + return line_alloc (block, size); + } else { + void *mem = 0; + memsline_t **sline = &super->last_freed[ind]; + if (!*sline) { + *sline = sline_new (super, ind); + } + if (*sline) { + size_t list = (*sline)->list << 2; + mem = (void *) ((size_t) *sline + list); + (*sline)->list = *(uint16_t *) mem >> 2; + if (!(*sline)->list) { + // the sub-line is full, so remove it from the free + // list. Freeing a block from the line will add it back + // to the list + memsline_t *s = *sline; + if ((*sline)->next) { + (*sline)->next->prev = (*sline)->prev; + } + *sline = (*sline)->next; + s->next = 0; + s->prev = 0; + } + } + return mem; + } + } + return 0; +} + +static void +unlink_block (memblock_t *block) +{ + if (block->next) { + block->next->prev = block->prev; + } + *block->prev = block->next; +} + +void +cmemfree (memsuper_t *super, void *mem) +{ + memsline_t **super_sline; + memsline_t *sline; + memblock_t *block; + + if ((size_t) mem & (MEM_LINE_SIZE - 1)) { + // sub line block + sline = (memsline_t *) ((size_t) mem & ~(MEM_LINE_SIZE - 1)); + *(uint16_t *) mem = sline->list << 2; + sline->list = (size_t) mem & (MEM_LINE_SIZE - 1); + super_sline = &super->last_freed[sline->size]; + if (*super_sline != sline) { + if (sline->next) { + sline->next->prev = sline->prev; + } + if (sline->prev) { + *(memsline_t **) (size_t)(sline->prev << 6) = sline->next; + } + + (*super_sline)->prev = (size_t) &sline->next >> 6; + sline->next = *super_sline; + sline->prev = (size_t) super_sline >> 6; + (*super_sline) = sline; + } + return; + } else if ((size_t) mem & super->page_mask) { + // cache line + size_t page_size = super->page_size; + size_t page_mask = super->page_mask; + block = (memblock_t *) (((size_t) mem + page_size) & ~page_mask) - 1; + line_free (block, mem); + } else { + // large block + block = (memblock_t *) mem - 1; + block->post_free = 1; + } + if (!block->pre_allocated && (!block->post_size || block->post_free)) { + unlink_block (block); + free (block->mem); + } +} diff --git a/libs/util/test/Makemodule.am b/libs/util/test/Makemodule.am index 8bbb671ad..39140bee7 100644 --- a/libs/util/test/Makemodule.am +++ b/libs/util/test/Makemodule.am @@ -1,5 +1,6 @@ libs_util_tests = \ libs/util/test/test-bary \ + libs/util/test/test-cmem \ libs/util/test/test-cs \ libs/util/test/test-darray \ libs/util/test/test-dq \ @@ -23,6 +24,10 @@ libs_util_test_test_bary_SOURCES=libs/util/test/test-bary.c libs_util_test_test_bary_LDADD=libs/util/libQFutil.la libs_util_test_test_bary_DEPENDENCIES=libs/util/libQFutil.la +libs_util_test_test_cmem_SOURCES=libs/util/test/test-cmem.c +libs_util_test_test_cmem_LDADD=libs/util/libQFutil.la +libs_util_test_test_cmem_DEPENDENCIES=libs/util/libQFutil.la + libs_util_test_test_cs_SOURCES=libs/util/test/test-cs.c libs_util_test_test_cs_LDADD=libs/util/libQFutil.la libs_util_test_test_cs_DEPENDENCIES=libs/util/libQFutil.la diff --git a/libs/util/test/test-cmem.c b/libs/util/test/test-cmem.c new file mode 100644 index 000000000..c8c2ee7e3 --- /dev/null +++ b/libs/util/test/test-cmem.c @@ -0,0 +1,346 @@ +#include +#include +#include +#include + +#include "QF/cmem.h" + +static int +test_block (memsuper_t *super) +{ + size_t size = super->page_size; + void *mem = cmemalloc (super, size); + memblock_t *block; + + if (!mem) { + fprintf (stderr, "could not allocate %zd byte block\n", + super->page_size); + return 0; + } + if ((size_t) mem & super->page_mask) { + fprintf (stderr, "mem not page aligned: %p %zd\n", + mem, super->page_size); + return 0; + } + block = super->memblocks; + if (mem != block + 1) { + fprintf (stderr, "super does not point to mem\n"); + return 0; + } + if (block->post_size < size) { + fprintf (stderr, "block post_size too small: %zd < %zd\n", + block->post_size, size); + return 0; + } + if (block->post_size - size >= super->page_size) { + fprintf (stderr, "block post_size too big: %zd < %zd\n", + block->post_size - size, super->page_size); + return 0; + } + memset (mem, 0, size); // valgrind check + cmemfree (super, mem); + if (super->memblocks) { + fprintf (stderr, "super still points to mem\n"); + return 0; + } + return 1; +} + +static int +test_line (memsuper_t *super) +{ + memline_t *line1 = cmemalloc (super, MEM_LINE_SIZE); + memline_t *line2 = cmemalloc (super, MEM_LINE_SIZE); + memline_t *line3 = cmemalloc (super, MEM_LINE_SIZE); + memblock_t *block = super->memblocks; + + if (block->next) { + fprintf (stderr, "too many memblocks\n"); + return 0; + } + if (line1 < (memline_t *) block->mem || line1 >= (memline_t *) block) { + fprintf (stderr, "line1 outside block line pool\n"); + return 0; + } + if (line2 < (memline_t *) block->mem || line2 >= (memline_t *) block) { + fprintf (stderr, "line2 outside block line pool\n"); + return 0; + } + if (line3 < (memline_t *) block->mem || line3 >= (memline_t *) block) { + fprintf (stderr, "line3 outside block line pool\n"); + return 0; + } + if (!((size_t) line1 & super->page_mask)) { + fprintf (stderr, "line1 is page aligned\n"); + return 0; + } + if (!((size_t) line2 & super->page_mask)) { + fprintf (stderr, "line2 is page aligned\n"); + return 0; + } + if (!((size_t) line3 & super->page_mask)) { + fprintf (stderr, "line3 is page aligned\n"); + return 0; + } + if (line1 + 1 != line2 || line2 + 1 != line3) { + fprintf (stderr, "lines not contiguous\n"); + return 0; + } + if (line3 + 1 != block->free_lines) { + fprintf (stderr, "line3 not contiguous with free lines\n"); + return 0; + } + if (block->free_lines->next) { + fprintf (stderr, "multiple free line blocks\n"); + return 0; + } + if (block->pre_allocated != 3 * MEM_LINE_SIZE) { + fprintf (stderr, "pre_allocated wrong size: %zd != %d\n", + block->pre_allocated, 3 * MEM_LINE_SIZE); + return 0; + } + if (block->free_lines->size != block->pre_size - block->pre_allocated) { + fprintf (stderr, "free lines wrong size: %zd != %zd\n", + block->free_lines->size, + block->pre_size - block->pre_allocated); + return 0; + } + size_t old_size = block->free_lines->size; + memline_t *old_line = block->free_lines; + cmemfree (super, line2); + if (block->pre_allocated != 2 * MEM_LINE_SIZE) { + fprintf (stderr, "pre_allocated wrong size: %zd != %d\n", + block->pre_allocated, 2 * MEM_LINE_SIZE); + return 0; + } + if (block->free_lines != line2) { + fprintf (stderr, "free lines not pointing to line2\n"); + return 0; + } + if (!block->free_lines->next || block->free_lines->next->next) { + fprintf (stderr, "incorrect number of free blocks\n"); + return 0; + } + if (line2->next != old_line || old_line->size != old_size) { + fprintf (stderr, "free line blocks corrupted\n"); + return 0; + } + if (block->free_lines->size != MEM_LINE_SIZE) { + fprintf (stderr, "free line block wrong size: %zd != %d\n", + block->free_lines->size, MEM_LINE_SIZE); + return 0; + } + cmemfree (super, line3); + if (block->free_lines != line2) { + fprintf (stderr, "free lines not pointing to line2 2\n"); + return 0; + } + if (block->pre_allocated != MEM_LINE_SIZE) { + fprintf (stderr, "pre_allocated wrong size: %zd != %d\n", + block->pre_allocated, MEM_LINE_SIZE); + return 0; + } + if (block->free_lines->size != block->pre_size - block->pre_allocated) { + fprintf (stderr, "free lines wrong size: %zd != %zd\n", + block->free_lines->size, + block->pre_size - block->pre_allocated); + return 0; + } + cmemfree (super, line1); + if (super->memblocks) { + fprintf (stderr, "line pool not freed\n"); + return 0; + } + return 1; +} + +static int +test_sline (memsuper_t *super) +{ + void *mem[] = { + //cmemalloc (super, 2), // smaller than min size + cmemalloc (super, 4), + cmemalloc (super, 4), + cmemalloc (super, 8), + cmemalloc (super, 8), + cmemalloc (super, 16), + cmemalloc (super, 16), + cmemalloc (super, 32), + cmemalloc (super, 32), + }; +#define mem_size (sizeof (mem) / sizeof (mem[0])) + int fail = 0; + for (size_t i = 0; i < mem_size; i++) { + printf("%p\n", mem[i]); + if (!mem[i]) { + fprintf (stderr, "mem[%zd] is null\n", i); + fail = 1; + } + for (size_t j = i + 1; j < mem_size; j++) { + if (mem[i] == mem[j]) { + fprintf (stderr, "mem[%zd] is dupped with %zd\n", i, j); + fail = 1; + } + } + } + if (fail) { + return 0; + } +#undef mem_size + return 1; +} + +static int +test_block_line (memsuper_t *super) +{ + void *mem = cmemalloc (super, 2 * super->page_size); + void *line; + memblock_t *block = super->memblocks; + + if (block + 1 != (memblock_t *) mem) { + fprintf (stderr, "super memblocks do not point to mem\n"); + return 0; + } + if (block->pre_size < MEM_LINE_SIZE) { + // need to figure out a way to guarantee a shared block + fprintf (stderr, "can't allocate line from block\n"); + return 0; + } + if (block->next) { + fprintf (stderr, "excess blocks in super\n"); + return 0; + } + line = cmemalloc (super, MEM_LINE_SIZE); + if (!((size_t) line & super->page_mask)) { + fprintf (stderr, "line is page aligned\n"); + return 0; + } + if (super->memblocks->next) { + // need to figure out a way to guarantee a shared block + fprintf (stderr, "mem and line not in same block\n"); + return 0; + } + cmemfree (super, mem); + if (!super->memblocks) { + fprintf (stderr, "shared block freed\n"); + return 0; + } + if (cmemalloc (super, super->page_size) != mem) { + fprintf (stderr, "block not reused for mem\n"); + return 0; + } + if (super->memblocks != block || super->memblocks->next) { + // need to figure out a way to guarantee a shared block + fprintf (stderr, "blocks corrupt\n"); + return 0; + } + cmemfree (super, line); + if (!super->memblocks) { + fprintf (stderr, "shared block freed 2\n"); + return 0; + } + cmemfree (super, mem); + if (super->memblocks) { + fprintf (stderr, "shared block not freed\n"); + return 0; + } + return 1; +} + +int +main (void) +{ + memsuper_t *super = new_memsuper (); + + if (sizeof (memsuper_t) != MEM_LINE_SIZE) { + fprintf (stderr, "memsuper_t not cache size: %zd\n", + sizeof (memline_t)); + return 1; + } + if (sizeof (memline_t) != MEM_LINE_SIZE) { + fprintf (stderr, "memline_t not cache size: %zd\n", + sizeof (memline_t)); + return 1; + } + if (sizeof (memsline_t) != 2 * sizeof (void *)) { + fprintf (stderr, "memsline_t not two pointers: %zd\n", + sizeof (memsline_t)); + return 1; + } + if (sizeof (memblock_t) != MEM_LINE_SIZE) { + fprintf (stderr, "memblock_t not cache size: %zd\n", + sizeof (memblock_t)); + return 1; + } + if ((size_t) super & (MEM_LINE_SIZE - 1)) { + fprintf (stderr, "super block not cache aligned: %p\n", super); + return 1; + } + if (super->page_size != (size_t) sysconf (_SC_PAGESIZE)) { + fprintf (stderr, "page size not equal to system page size: %zd, %zd\n", + super->page_size, sysconf (_SC_PAGESIZE)); + return 1; + } + if (!super->page_size || (super->page_size & (super->page_size - 1))) { + fprintf (stderr, "page size not power of two: %zd\n", + super->page_size); + return 1; + } + if (super->page_mask + 1 != super->page_size) { + fprintf (stderr, "page mask not page size - 1: %zx %zx\n", + super->page_mask, super->page_size); + return 1; + } + if (!super->page_mask || (super->page_mask & (super->page_mask + 1))) { + fprintf (stderr, "page mask not all 1s: %zx\n", + super->page_mask); + return 1; + } + if (super->memblocks) { + fprintf (stderr, "super block list not null\n"); + return 1; + } + if (!test_block (super)) { + fprintf (stderr, "block tests failed\n"); + } + if (super->memblocks) { + fprintf (stderr, "super block list not null 2\n"); + return 1; + } + if (!test_line (super)) { + fprintf (stderr, "line tests failed\n"); + return 1; + } + if (super->memblocks) { + fprintf (stderr, "super block list not null 2\n"); + return 1; + } + if (!test_block_line (super)) { + fprintf (stderr, "block-line tests failed\n"); + return 1; + } + for (size_t i = 0; i < 2 * super->page_size / MEM_LINE_SIZE; i++) { + void *line = cmemalloc (super, MEM_LINE_SIZE); + if (!line) { + fprintf (stderr, "could not allocate %d byte line\n", + MEM_LINE_SIZE); + return 1; + } + if ((size_t) line % MEM_LINE_SIZE) { + fprintf (stderr, "line not cache-line aligned: %p %d\n", + line, MEM_LINE_SIZE); + return 1; + } + if (!((size_t) line & super->page_mask)) { + fprintf (stderr, "line is page aligned: %p %zd\n", + line, super->page_size); + return 1; + } + } + if (!test_sline (super)) { + fprintf (stderr, "sub-line tests failed\n"); + return 1; + } + delete_memsuper (super); + return 0; +}