From dcd7c3c9fef851de8acf75d82a6ac49a77edcc61 Mon Sep 17 00:00:00 2001 From: terminx Date: Wed, 30 Sep 2009 22:19:57 +0000 Subject: [PATCH] update nedmalloc to r1116 git-svn-id: https://svn.eduke32.com/eduke32@1509 1a8010ca-5511-0410-912e-c29ae57300e0 --- polymer/eduke32/build/include/compat.h | 4 +- polymer/eduke32/build/include/malloc.c.h | 494 ++++--- polymer/eduke32/build/include/nedmalloc.h | 152 +- polymer/eduke32/build/src/nedmalloc.c | 1567 ++++++++++++--------- 4 files changed, 1275 insertions(+), 942 deletions(-) diff --git a/polymer/eduke32/build/include/compat.h b/polymer/eduke32/build/include/compat.h index 349a4e129..15220a832 100644 --- a/polymer/eduke32/build/include/compat.h +++ b/polymer/eduke32/build/include/compat.h @@ -35,7 +35,9 @@ #endif #endif -#define REPLACE_SYSTEM_ALLOCATOR +#define USE_ALLOCATOR 1 +#define REPLACE_SYSTEM_ALLOCATOR 1 +#define USE_MAGIC_HEADERS 1 #include "nedmalloc.h" #ifndef TRUE diff --git a/polymer/eduke32/build/include/malloc.c.h b/polymer/eduke32/build/include/malloc.c.h index 8da54c9bc..ebb72ee95 100644 --- a/polymer/eduke32/build/include/malloc.c.h +++ b/polymer/eduke32/build/include/malloc.c.h @@ -4,7 +4,7 @@ http://creativecommons.org/licenses/publicdomain. Send questions, comments, complaints, performance data, etc to dl@cs.oswego.edu -* Version 2.8.4 Wed May 27 09:56:23 2009 Doug Lea (dl at gee) +* Version 2.8.4 Wed May 27 09:56:23 2009 Doug Lea (dl at gee) Note: There may be an updated version of this malloc obtainable at ftp://gee.cs.oswego.edu/pub/misc/malloc.c @@ -245,8 +245,8 @@ USE_LOCKS default: 0 (false) pthread or WIN32 mutex lock/unlock. (If set true, this can be overridden on a per-mspace basis for mspace versions.) If set to a non-zero value other than 1, locks are used, but their - implementation is left out, so lock functions must be supplied manually, - as described below. + implementation is left out, so lock functions must be supplied manually, + as described below. USE_SPIN_LOCKS default: 1 iff USE_LOCKS and on x86 using gcc or MSC If true, uses custom spin locks for locking. This is currently @@ -375,7 +375,18 @@ malloc_getpagesize default: derive from system includes, or 4096. memory from the system in page-size units. This may be (and usually is) a function rather than a constant. This is ignored if WIN32, where page size is determined using getSystemInfo during - initialization. + initialization. This may be several megabytes if ENABLE_LARGE_PAGES + is enabled. + +ENABLE_LARGE_PAGES default: NOT defined + Causes the system page size to be the value of GetLargePageMinimum() + if that function is available (Windows Server 2003/Vista or later). + This allows the use of large page entries in the MMU which can + significantly improve performance in large working set applications + as TLB cache load is reduced by a factor of three. Note that enabling + this option is equal to locking the process' memory in current + implementations of Windows and requires the SE_LOCK_MEMORY_PRIVILEGE + to be held by the process in order to succeed. USE_DEV_RANDOM default: 0 (i.e., not used) Causes malloc to use /dev/random to initialize secure magic seed for @@ -405,6 +416,7 @@ LACKS_STDLIB_H default: NOT defined unless on WIN32 DEFAULT_GRANULARITY default: page size if MORECORE_CONTIGUOUS, system_info.dwAllocationGranularity in WIN32, + GetLargePageMinimum() if ENABLE_LARGE_PAGES, otherwise 64K. Also settable using mallopt(M_GRANULARITY, x) The unit for allocating and deallocating memory from the system. On @@ -418,6 +430,15 @@ DEFAULT_GRANULARITY default: page size if MORECORE_CONTIGUOUS, versions of malloc, the equivalent of this option was called "TOP_PAD") +DEFAULT_GRANULARITY_ALIGNED default: undefined (which means page size) + Whether to enforce alignment when allocating and deallocating memory + from the system i.e. the base address of all allocations will be + aligned to DEFAULT_GRANULARITY if it is set. Note that enabling this carries + some overhead as multiple calls must now be made when probing for a valid + aligned value, however it does greatly ease the checking for whether + a given memory pointer was allocated by this allocator rather than + some other. + DEFAULT_TRIM_THRESHOLD default: 2MB Also settable using mallopt(M_TRIM_THRESHOLD, x) The maximum amount of unused top-most memory to keep before @@ -497,6 +518,7 @@ MAX_RELEASE_CHECK_RATE default: 4095 unless not HAVE_MMAP #ifdef WIN32 #define WIN32_LEAN_AND_MEAN #include +#include #define HAVE_MMAP 1 #define HAVE_MORECORE 0 #define LACKS_UNISTD_H @@ -532,12 +554,12 @@ MAX_RELEASE_CHECK_RATE default: 4095 unless not HAVE_MMAP #include /* For size_t */ #endif /* LACKS_SYS_TYPES_H */ -#if (defined(__GNUC__) && ((defined(__i386__) || defined(__x86_64__)))) || (defined(_MSC_VER) && _MSC_VER>=1310) -#define SPIN_LOCKS_AVAILABLE 1 -#else -#define SPIN_LOCKS_AVAILABLE 0 -#endif - +#if (defined(__GNUC__) && ((defined(__i386__) || defined(__x86_64__)))) || (defined(_MSC_VER) && _MSC_VER>=1310) +#define SPIN_LOCKS_AVAILABLE 1 +#else +#define SPIN_LOCKS_AVAILABLE 0 +#endif + /* The maximum possible size_t value has all bits set */ #define MAX_SIZE_T (~(size_t)0) @@ -572,11 +594,11 @@ MAX_RELEASE_CHECK_RATE default: 4095 unless not HAVE_MMAP #define USE_LOCKS 0 #endif /* USE_LOCKS */ #ifndef USE_SPIN_LOCKS -#if USE_LOCKS && SPIN_LOCKS_AVAILABLE +#if USE_LOCKS && SPIN_LOCKS_AVAILABLE #define USE_SPIN_LOCKS 1 #else #define USE_SPIN_LOCKS 0 -#endif /* USE_LOCKS && SPIN_LOCKS_AVAILABLE. */ +#endif /* USE_LOCKS && SPIN_LOCKS_AVAILABLE. */ #endif /* USE_SPIN_LOCKS */ #ifndef INSECURE #define INSECURE 0 @@ -1150,17 +1172,17 @@ size_t destroy_mspace(mspace msp); mspace create_mspace_with_base(void* base, size_t capacity, int locked); /* - mspace_track_large_chunks controls whether requests for large chunks - are allocated in their own untracked mmapped regions, separate from - others in this mspace. By default large chunks are not tracked, - which reduces fragmentation. However, such chunks are not - necessarily released to the system upon destroy_mspace. Enabling - tracking by setting to true may increase fragmentation, but avoids - leakage when relying on destroy_mspace to release all memory - allocated using this space. The function returns the previous - setting. + mspace_track_large_chunks controls whether requests for large chunks + are allocated in their own untracked mmapped regions, separate from + others in this mspace. By default large chunks are not tracked, + which reduces fragmentation. However, such chunks are not + necessarily released to the system upon destroy_mspace. Enabling + tracking by setting to true may increase fragmentation, but avoids + leakage when relying on destroy_mspace to release all memory + allocated using this space. The function returns the previous + setting. */ -int mspace_track_large_chunks(mspace msp, int enable); +int mspace_track_large_chunks(mspace msp, int enable); /* @@ -1262,7 +1284,7 @@ int mspace_mallopt(int, int); #endif /* MSPACES */ #ifdef __cplusplus -}; /* end of extern "C" */ +} /* end of extern "C" */ #endif /* __cplusplus */ /* @@ -1277,10 +1299,8 @@ int mspace_mallopt(int, int); /*------------------------------ internal #includes ---------------------- */ -#ifdef WIN32 -#ifndef __GNUC__ +#if defined(WIN32) && defined(_MSC_VER) #pragma warning( disable : 4146 ) /* no "unsigned" warnings */ -#endif #endif /* WIN32 */ #include /* for printing in malloc_stats */ @@ -1288,7 +1308,7 @@ int mspace_mallopt(int, int); #ifndef LACKS_ERRNO_H #include /* for MALLOC_FAILURE_ACTION */ #endif /* LACKS_ERRNO_H */ -#if FOOTERS || DEBUG +#if FOOTERS || DEBUG #include /* for magic initialization */ #endif /* FOOTERS */ #ifndef LACKS_STDLIB_H @@ -1296,7 +1316,7 @@ int mspace_mallopt(int, int); #endif /* LACKS_STDLIB_H */ #ifdef DEBUG #if ABORT_ON_ASSERT_FAILURE -#undef assert +#undef assert #define assert(x) if(!(x)) ABORT #else /* ABORT_ON_ASSERT_FAILURE */ #include @@ -1317,14 +1337,14 @@ int mspace_mallopt(int, int); #endif /* USE_BUILTIN_FFS */ #if HAVE_MMAP #ifndef LACKS_SYS_MMAN_H -/* On some versions of linux, mremap decl in mman.h needs __USE_GNU set */ -#if (defined(linux) && !defined(__USE_GNU)) -#define __USE_GNU 1 -#include /* for mmap */ -#undef __USE_GNU -#else +/* On some versions of linux, mremap decl in mman.h needs __USE_GNU set */ +#if (defined(linux) && !defined(__USE_GNU)) +#define __USE_GNU 1 #include /* for mmap */ -#endif /* linux */ +#undef __USE_GNU +#else +#include /* for mmap */ +#endif /* linux */ #endif /* LACKS_SYS_MMAN_H */ #ifndef LACKS_FCNTL_H #include @@ -1516,6 +1536,29 @@ unsigned char _BitScanReverse(unsigned long *index, unsigned long mask); ((((size_t)(A) & CHUNK_ALIGN_MASK) == 0)? 0 :\ ((MALLOC_ALIGNMENT - ((size_t)(A) & CHUNK_ALIGN_MASK)) & CHUNK_ALIGN_MASK)) +/* + malloc_params holds global properties, including those that can be + dynamically set using mallopt. There is a single instance, mparams, + initialized in init_mparams. Note that the non-zeroness of "magic" + also serves as an initialization flag. +*/ + +typedef unsigned int flag_t; /* The type of various bit flag sets */ + +struct malloc_params { + volatile size_t magic; + size_t page_size; + size_t granularity; + size_t mmap_threshold; + size_t trim_threshold; + flag_t default_mflags; +}; + +static struct malloc_params mparams; + +/* Ensure mparams initialized */ +#define ensure_initialization() (void)(mparams.magic != 0 || init_mparams()) + /* -------------------------- MMAP preliminaries ------------------------- */ /* @@ -1532,14 +1575,41 @@ unsigned char _BitScanReverse(unsigned long *index, unsigned long mask); #if HAVE_MMAP #ifndef WIN32 -#define MUNMAP_DEFAULT(a, s) munmap((a), (s)) -#define MMAP_PROT (PROT_READ|PROT_WRITE) #if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) #define MAP_ANONYMOUS MAP_ANON #endif /* MAP_ANON */ +#ifdef DEFAULT_GRANULARITY_ALIGNED +#define MMAP_IMPL mmap_aligned +static void* lastAlignedmmap; /* Used as a hint */ +static void* mmap_aligned(void *start, size_t length, int prot, int flags, int fd, off_t offset) { + void* baseaddress = 0; + void* ptr = 0; + if(!start) { + baseaddress = lastAlignedmmap; + for(;;) { + if(baseaddress) flags|=MAP_FIXED; + ptr = mmap(baseaddress, length, prot, flags, fd, offset); + if(!ptr) + baseaddress = (void*)((size_t)baseaddress + mparams.granularity); + else if((size_t)ptr & (mparams.granularity - SIZE_T_ONE)) { + munmap(ptr, length); + baseaddress = (void*)(((size_t)ptr + mparams.granularity) & ~(mparams.granularity - SIZE_T_ONE)); + } + else break; + } + } + else ptr = mmap(start, length, prot, flags, fd, offset); + if(ptr) lastAlignedmmap = (void*)((size_t) ptr + mparams.granularity); + return ptr; +} +#else +#define MMAP_IMPL mmap +#endif /* DEFAULT_GRANULARITY_ALIGNED */ +#define MUNMAP_DEFAULT(a, s) munmap((a), (s)) +#define MMAP_PROT (PROT_READ|PROT_WRITE) #ifdef MAP_ANONYMOUS #define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS) -#define MMAP_DEFAULT(s) mmap(0, (s), MMAP_PROT, MMAP_FLAGS, -1, 0) +#define MMAP_DEFAULT(s) MMAP_IMPL(0, (s), MMAP_PROT, MMAP_FLAGS, -1, 0) #else /* MAP_ANONYMOUS */ /* Nearly all versions of mmap support MAP_ANONYMOUS, so the following @@ -1549,8 +1619,8 @@ unsigned char _BitScanReverse(unsigned long *index, unsigned long mask); static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */ #define MMAP_DEFAULT(s) ((dev_zero_fd < 0) ? \ (dev_zero_fd = open("/dev/zero", O_RDWR), \ - mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) : \ - mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) + MMAP_IMPL(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) : \ + MMAP_IMPL(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) #endif /* MAP_ANONYMOUS */ #define DIRECT_MMAP_DEFAULT(s) MMAP_DEFAULT(s) @@ -1558,8 +1628,51 @@ static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */ #else /* WIN32 */ /* Win32 MMAP via VirtualAlloc */ +#ifdef DEFAULT_GRANULARITY_ALIGNED +static void* lastWin32mmap; /* Used as a hint */ +#endif /* DEFAULT_GRANULARITY_ALIGNED */ +#ifdef ENABLE_LARGE_PAGES +static int largepagesavailable = 1; +#endif /* ENABLE_LARGE_PAGES */ static FORCEINLINE void* win32mmap(size_t size) { - void* ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); + void* baseaddress = 0; + void* ptr = 0; +#ifdef ENABLE_LARGE_PAGES + /* Note that large pages are *always* allocated on a large page boundary. + If however granularity is small then don't waste a kernel call if size + isn't around the size of a large page */ + if(largepagesavailable && size >= 1*1024*1024) { + ptr = VirtualAlloc(baseaddress, size, MEM_RESERVE|MEM_COMMIT|MEM_LARGE_PAGES, PAGE_READWRITE); + if(!ptr && ERROR_PRIVILEGE_NOT_HELD==GetLastError()) largepagesavailable=0; + } +#endif + if(!ptr) { +#ifdef DEFAULT_GRANULARITY_ALIGNED + /* We try to avoid overhead by speculatively reserving at aligned + addresses until we succeed */ + baseaddress = lastWin32mmap; + for(;;) { + void* reserveaddr = VirtualAlloc(baseaddress, size, MEM_RESERVE, PAGE_READWRITE); + if(!reserveaddr) + baseaddress = (void*)((size_t)baseaddress + mparams.granularity); + else if((size_t)reserveaddr & (mparams.granularity - SIZE_T_ONE)) { + VirtualFree(reserveaddr, 0, MEM_RELEASE); + baseaddress = (void*)(((size_t)reserveaddr + mparams.granularity) & ~(mparams.granularity - SIZE_T_ONE)); + } + else break; + } +#endif + if(!ptr) ptr = VirtualAlloc(baseaddress, size, baseaddress ? MEM_COMMIT : MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); +#if DEBUG + if(lastWin32mmap && ptr!=lastWin32mmap) printf("Non-contiguous VirtualAlloc between %p and %p\n", ptr, lastWin32mmap); +#endif +#ifdef DEFAULT_GRANULARITY_ALIGNED + if(ptr) lastWin32mmap = (void*)((size_t) ptr + mparams.granularity); +#endif + } +#if DEBUG + printf("VirtualAlloc returns %p size %u\n", ptr, size); +#endif return (ptr != 0)? ptr: MFAIL; } @@ -1685,33 +1798,33 @@ static FORCEINLINE int win32munmap(void* ptr, size_t size) { Because lock-protected regions generally have bounded times, it is OK to use the supplied simple spinlocks in the custom versions for - x86. Spinlocks are likely to improve performance for lightly - contended applications, but worsen performance under heavy - contention. + x86. Spinlocks are likely to improve performance for lightly + contended applications, but worsen performance under heavy + contention. If USE_LOCKS is > 1, the definitions of lock routines here are - bypassed, in which case you will need to define the type MLOCK_T, - and at least INITIAL_LOCK, ACQUIRE_LOCK, RELEASE_LOCK and possibly - TRY_LOCK (which is not used in this malloc, but commonly needed in - extensions.) You must also declare a - static MLOCK_T malloc_global_mutex = { initialization values };. - + bypassed, in which case you will need to define the type MLOCK_T, + and at least INITIAL_LOCK, ACQUIRE_LOCK, RELEASE_LOCK and possibly + TRY_LOCK (which is not used in this malloc, but commonly needed in + extensions.) You must also declare a + static MLOCK_T malloc_global_mutex = { initialization values };. + */ #if USE_LOCKS == 1 -#if USE_SPIN_LOCKS && SPIN_LOCKS_AVAILABLE +#if USE_SPIN_LOCKS && SPIN_LOCKS_AVAILABLE #ifndef WIN32 /* Custom pthread-style spin locks on x86 and x64 for gcc */ struct pthread_mlock_t { volatile unsigned int l; - unsigned int c; - pthread_t threadid; + unsigned int c; + pthread_t threadid; }; -#define MLOCK_T struct pthread_mlock_t +#define MLOCK_T struct pthread_mlock_t #define CURRENT_THREAD pthread_self() -#define INITIAL_LOCK(sl) ((sl)->threadid = 0, (sl)->l = (sl)->c = 0, 0) +#define INITIAL_LOCK(sl) ((sl)->threadid = 0, (sl)->l = (sl)->c = 0, 0) #define ACQUIRE_LOCK(sl) pthread_acquire_lock(sl) #define RELEASE_LOCK(sl) pthread_release_lock(sl) #define TRY_LOCK(sl) pthread_try_lock(sl) @@ -1741,27 +1854,27 @@ static FORCEINLINE int pthread_acquire_lock (MLOCK_T *sl) { if (!ret) { assert(!sl->threadid); sl->threadid = CURRENT_THREAD; - sl->c = 1; + sl->c = 1; return 0; } - } - if ((++spins & SPINS_PER_YIELD) == 0) { + } + if ((++spins & SPINS_PER_YIELD) == 0) { #if defined (__SVR4) && defined (__sun) /* solaris */ - thr_yield(); + thr_yield(); #else #if defined(__linux__) || defined(__FreeBSD__) || defined(__APPLE__) - sched_yield(); + sched_yield(); #else /* no-op yield on unknown systems */ - ; + ; #endif /* __linux__ || __FreeBSD__ || __APPLE__ */ #endif /* solaris */ - } } } +} static FORCEINLINE void pthread_release_lock (MLOCK_T *sl) { - volatile unsigned int* lp = &sl->l; - assert(*lp != 0); + volatile unsigned int* lp = &sl->l; + assert(*lp != 0); assert(sl->threadid == CURRENT_THREAD); if (--sl->c == 0) { sl->threadid = 0; @@ -1777,10 +1890,10 @@ static FORCEINLINE void pthread_release_lock (MLOCK_T *sl) { static FORCEINLINE int pthread_try_lock (MLOCK_T *sl) { volatile unsigned int* lp = &sl->l; if (*lp != 0) { - if (sl->threadid == CURRENT_THREAD) { - ++sl->c; - return 1; - } + if (sl->threadid == CURRENT_THREAD) { + ++sl->c; + return 1; + } } else { int cmp = 0; @@ -1793,7 +1906,7 @@ static FORCEINLINE int pthread_try_lock (MLOCK_T *sl) { if (!ret) { assert(!sl->threadid); sl->threadid = CURRENT_THREAD; - sl->c = 1; + sl->c = 1; return 1; } } @@ -1803,15 +1916,15 @@ static FORCEINLINE int pthread_try_lock (MLOCK_T *sl) { #else /* WIN32 */ /* Custom win32-style spin locks on x86 and x64 for MSC */ -struct win32_mlock_t { +struct win32_mlock_t { volatile long l; - unsigned int c; - long threadid; + unsigned int c; + long threadid; }; #define MLOCK_T struct win32_mlock_t -#define CURRENT_THREAD ((long)GetCurrentThreadId()) -#define INITIAL_LOCK(sl) ((sl)->threadid = 0, (sl)->l = (sl)->c = 0, 0) +#define CURRENT_THREAD ((long)GetCurrentThreadId()) +#define INITIAL_LOCK(sl) ((sl)->threadid = 0, (sl)->l = (sl)->c = 0, 0) #define ACQUIRE_LOCK(sl) win32_acquire_lock(sl) #define RELEASE_LOCK(sl) win32_release_lock(sl) #define TRY_LOCK(sl) win32_try_lock(sl) @@ -1823,15 +1936,14 @@ static FORCEINLINE int win32_acquire_lock (MLOCK_T *sl) { int spins = 0; for (;;) { if (sl->l != 0) { - if (sl->threadid == (signed)CURRENT_THREAD) { + if (sl->threadid == CURRENT_THREAD) { ++sl->c; return 0; } } else { if (!interlockedexchange(&sl->l, 1)) { - assert(!sl->1855 -); + assert(!sl->threadid); sl->threadid = CURRENT_THREAD; sl->c = 1; return 0; @@ -1852,11 +1964,11 @@ static FORCEINLINE void win32_release_lock (MLOCK_T *sl) { } static FORCEINLINE int win32_try_lock (MLOCK_T *sl) { - if(sl->l != 0) { - if (sl->threadid == (signed)CURRENT_THREAD) { - ++sl->c; - return 1; - } + if (sl->l != 0) { + if (sl->threadid == CURRENT_THREAD) { + ++sl->c; + return 1; + } } else { if (!interlockedexchange(&sl->l, 1)){ @@ -1909,9 +2021,9 @@ static int pthread_init_lock (MLOCK_T *sl) { #define MLOCK_T CRITICAL_SECTION #define CURRENT_THREAD GetCurrentThreadId() #define INITIAL_LOCK(s) (!InitializeCriticalSectionAndSpinCount((s), 0x80000000|4000)) -#define ACQUIRE_LOCK(s) (EnterCriticalSection(sl), 0) -#define RELEASE_LOCK(s) LeaveCriticalSection(sl) -#define TRY_LOCK(s) TryEnterCriticalSection(sl) +#define ACQUIRE_LOCK(s) (EnterCriticalSection(sl), 0) +#define RELEASE_LOCK(s) LeaveCriticalSection(sl) +#define TRY_LOCK(s) TryEnterCriticalSection(sl) #define NEED_GLOBAL_LOCK_INIT static MLOCK_T malloc_global_mutex; @@ -1959,12 +2071,12 @@ static void init_malloc_global_mutex() { #endif /* USE_LOCKS */ #if USE_LOCKS -#ifndef ACQUIRE_MALLOC_GLOBAL_LOCK +#ifndef ACQUIRE_MALLOC_GLOBAL_LOCK #define ACQUIRE_MALLOC_GLOBAL_LOCK() ACQUIRE_LOCK(&malloc_global_mutex); -#endif -#ifndef RELEASE_MALLOC_GLOBAL_LOCK +#endif +#ifndef RELEASE_MALLOC_GLOBAL_LOCK #define RELEASE_MALLOC_GLOBAL_LOCK() RELEASE_LOCK(&malloc_global_mutex); -#endif +#endif #else /* USE_LOCKS */ #define ACQUIRE_MALLOC_GLOBAL_LOCK() #define RELEASE_MALLOC_GLOBAL_LOCK() @@ -2067,9 +2179,9 @@ static void init_malloc_global_mutex() { The C (CINUSE_BIT) bit, stored in the unused second-lowest bit of the chunk size redundantly records whether the current chunk is - inuse (unless the chunk is mmapped). This redundancy enables usage - checks within free and realloc, and reduces indirection when freeing - and consolidating chunks. + inuse (unless the chunk is mmapped). This redundancy enables usage + checks within free and realloc, and reduces indirection when freeing + and consolidating chunks. Each freshly allocated chunk must have both cinuse and pinuse set. That is, each allocated chunk borders either a previously allocated @@ -2098,8 +2210,8 @@ static void init_malloc_global_mutex() { space is still allocated for it (TOP_FOOT_SIZE) to enable separation or merging when space is extended. - 3. Chunks allocated via mmap, have both cinuse and pinuse bits - cleared in their head fields. Because they are allocated + 3. Chunks allocated via mmap, have both cinuse and pinuse bits + cleared in their head fields. Because they are allocated one-by-one, each must carry its own prev_foot field, which is also used to hold the offset this chunk has within its mmapped region, which is needed to preserve alignment. Each mmapped @@ -2120,7 +2232,6 @@ typedef struct malloc_chunk* mchunkptr; typedef struct malloc_chunk* sbinptr; /* The type of bins of chunks */ typedef unsigned int bindex_t; /* Described below */ typedef unsigned int binmap_t; /* Described below */ -typedef unsigned int flag_t; /* The type of various bit flag sets */ /* ------------------- Chunks sizes and alignments ----------------------- */ @@ -2165,7 +2276,7 @@ typedef unsigned int flag_t; /* The type of various bit flag sets */ /* The head field of a chunk is or'ed with PINUSE_BIT when previous adjacent chunk in use, and or'ed with CINUSE_BIT if this chunk is in - use, unless mmapped, in which case both bits are cleared. + use, unless mmapped, in which case both bits are cleared. FLAG4_BIT is not used by this malloc, but might be useful in extensions. */ @@ -2182,9 +2293,9 @@ typedef unsigned int flag_t; /* The type of various bit flag sets */ /* extraction of fields from head words */ #define cinuse(p) ((p)->head & CINUSE_BIT) #define pinuse(p) ((p)->head & PINUSE_BIT) -#define is_inuse(p) (((p)->head & INUSE_BITS) != PINUSE_BIT) -#define is_mmapped(p) (((p)->head & INUSE_BITS) == 0) - +#define is_inuse(p) (((p)->head & INUSE_BITS) != PINUSE_BIT) +#define is_mmapped(p) (((p)->head & INUSE_BITS) == 0) + #define chunksize(p) ((p)->head & ~(FLAG_BITS)) #define clear_pinuse(p) ((p)->head &= ~PINUSE_BIT) @@ -2382,7 +2493,7 @@ typedef struct malloc_tree_chunk* tbinptr; /* The type of bins of trees */ and so should not try to deallocate or merge with others. (This currently holds only for the initial segment passed into create_mspace_with_base.) - * If USE_MMAP_BIT set, the segment may be merged with + * If USE_MMAP_BIT set, the segment may be merged with other surrounding mmapped segments and trimmed/de-allocated using munmap. * If neither bit is set, then the segment was obtained using @@ -2397,7 +2508,7 @@ struct malloc_segment { flag_t sflags; /* mmap and extern flag */ }; -#define is_mmapped_segment(S) ((S)->sflags & USE_MMAP_BIT) +#define is_mmapped_segment(S) ((S)->sflags & USE_MMAP_BIT) #define is_extern_segment(S) ((S)->sflags & EXTERN_BIT) typedef struct malloc_segment msegment; @@ -2513,10 +2624,10 @@ struct malloc_state { size_t footprint; size_t max_footprint; flag_t mflags; + msegment seg; #if USE_LOCKS MLOCK_T mutex; /* locate lock among fields that rarely change */ #endif /* USE_LOCKS */ - msegment seg; void* extp; /* Unused but available for extensions */ size_t exts; }; @@ -2525,27 +2636,6 @@ typedef struct malloc_state* mstate; /* ------------- Global malloc_state and malloc_params ------------------- */ -/* - malloc_params holds global properties, including those that can be - dynamically set using mallopt. There is a single instance, mparams, - initialized in init_mparams. Note that the non-zeroness of "magic" - also serves as an initialization flag. -*/ - -struct malloc_params { - volatile size_t magic; - size_t page_size; - size_t granularity; - size_t mmap_threshold; - size_t trim_threshold; - flag_t default_mflags; -}; - -static struct malloc_params mparams; - -/* Ensure mparams initialized */ -#define ensure_initialization() (void)(mparams.magic != 0 || init_mparams()) - #if !ONLY_MSPACES /* The global malloc_state used for all non-"mspace" calls */ @@ -2734,7 +2824,7 @@ static size_t traverse_and_check(mstate m); /* ---------------------------- Indexing Bins ---------------------------- */ #define is_small(s) (((s) >> SMALLBIN_SHIFT) < NSMALLBINS) -#define small_index(s) ((s) >> SMALLBIN_SHIFT) +#define small_index(s) (bindex_t)((s) >> SMALLBIN_SHIFT) #define small_index2size(i) ((i) << SMALLBIN_SHIFT) #define MIN_SMALL_INDEX (small_index(MIN_CHUNK_SIZE)) @@ -2753,7 +2843,7 @@ static size_t traverse_and_check(mstate m); I = NTREEBINS-1;\ else {\ unsigned int K;\ - __asm__("bsrl\t%1, %0\n\t" : "=r" (K) : "g" (X));\ + __asm__("bsrl\t%1, %0\n\t" : "=r" (K) : "g" (X));\ I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\ }\ } @@ -2782,7 +2872,7 @@ static size_t traverse_and_check(mstate m); I = NTREEBINS-1;\ else {\ unsigned int K;\ - _BitScanReverse((DWORD *) &K, X);\ + _BitScanReverse((DWORD *) &K, (DWORD) X);\ I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\ }\ } @@ -2851,7 +2941,7 @@ static size_t traverse_and_check(mstate m); #define compute_bit2idx(X, I)\ {\ unsigned int J;\ - __asm__("bsfl\t%1, %0\n\t" : "=r" (J) : "g" (X));\ + __asm__("bsfl\t%1, %0\n\t" : "=r" (J) : "g" (X));\ I = (bindex_t)J;\ } @@ -2922,15 +3012,15 @@ static size_t traverse_and_check(mstate m); #define ok_address(M, a) ((char*)(a) >= (M)->least_addr) /* Check if address of next chunk n is higher than base chunk p */ #define ok_next(p, n) ((char*)(p) < (char*)(n)) -/* Check if p has inuse status */ -#define ok_inuse(p) is_inuse(p) +/* Check if p has inuse status */ +#define ok_inuse(p) is_inuse(p) /* Check if p has its pinuse bit on */ #define ok_pinuse(p) pinuse(p) #else /* !INSECURE */ #define ok_address(M, a) (1) #define ok_next(b, n) (1) -#define ok_inuse(p) (1) +#define ok_inuse(p) (1) #define ok_pinuse(p) (1) #endif /* !INSECURE */ @@ -2959,8 +3049,8 @@ static size_t traverse_and_check(mstate m); #define mark_inuse_foot(M,p,s) -/* Macros for setting head/foot of non-mmapped chunks */ - +/* Macros for setting head/foot of non-mmapped chunks */ + /* Set cinuse bit and pinuse bit of next chunk */ #define set_inuse(M,p,s)\ ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\ @@ -3003,6 +3093,10 @@ static size_t traverse_and_check(mstate m); /* ---------------------------- setting mparams -------------------------- */ +#ifdef ENABLE_LARGE_PAGES +typedef size_t (WINAPI *GetLargePageMinimum_t)(void); +#endif + /* Initialize mparams */ static int init_mparams(void) { #ifdef NEED_GLOBAL_LOCK_INIT @@ -3026,6 +3120,20 @@ static int init_mparams(void) { psize = system_info.dwPageSize; gsize = ((DEFAULT_GRANULARITY != 0)? DEFAULT_GRANULARITY : system_info.dwAllocationGranularity); +#ifdef ENABLE_LARGE_PAGES + { + GetLargePageMinimum_t GetLargePageMinimum_ = (GetLargePageMinimum_t) GetProcAddress(GetModuleHandle(__T("kernel32.dll")), "GetLargePageMinimum"); + if(GetLargePageMinimum_) { + size_t largepagesize = GetLargePageMinimum_(); + if(largepagesize) { + psize = largepagesize; + gsize = ((DEFAULT_GRANULARITY != 0)? + DEFAULT_GRANULARITY : largepagesize); + if(gsize < largepagesize) gsize = largepagesize; + } + } + } +#endif } #endif /* WIN32 */ @@ -3076,13 +3184,13 @@ static int init_mparams(void) { #ifdef WIN32 magic = (size_t)(GetTickCount() ^ (size_t)0x55555555U); #else - magic = (size_t)(time(0) ^ (size_t)0x55555555U); + magic = (size_t)(time(0) ^ (size_t)0x55555555U); #endif magic |= (size_t)8U; /* ensure nonzero */ magic &= ~(size_t)7U; /* improve chances of fault for bad values */ - mparams.magic = magic; + mparams.magic = magic; + } } - } RELEASE_MALLOC_GLOBAL_LOCK(); return 1; @@ -3090,9 +3198,9 @@ static int init_mparams(void) { /* support for mallopt */ static int change_mparam(int param_number, int value) { - size_t val; + size_t val; ensure_initialization(); - val = (value == -1)? MAX_SIZE_T : (size_t)value; + val = (value == -1)? MAX_SIZE_T : (size_t)value; switch(param_number) { case M_TRIM_THRESHOLD: mparams.trim_threshold = val; @@ -3138,7 +3246,7 @@ static void do_check_top_chunk(mstate m, mchunkptr p) { /* Check properties of (inuse) mmapped chunks */ static void do_check_mmapped_chunk(mstate m, mchunkptr p) { size_t sz = chunksize(p); - size_t len = (sz + (p->prev_foot) + MMAP_FOOT_PAD); + size_t len = (sz + (p->prev_foot) + MMAP_FOOT_PAD); assert(is_mmapped(p)); assert(use_mmap(m)); assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD)); @@ -3152,7 +3260,7 @@ static void do_check_mmapped_chunk(mstate m, mchunkptr p) { /* Check properties of inuse chunks */ static void do_check_inuse_chunk(mstate m, mchunkptr p) { do_check_any_chunk(m, p); - assert(is_inuse(p)); + assert(is_inuse(p)); assert(next_pinuse(p)); /* If not pinuse and not mmapped, previous chunk has OK offset */ assert(is_mmapped(p) || pinuse(p) || next_chunk(prev_chunk(p)) == p); @@ -3165,7 +3273,7 @@ static void do_check_free_chunk(mstate m, mchunkptr p) { size_t sz = chunksize(p); mchunkptr next = chunk_plus_offset(p, sz); do_check_any_chunk(m, p); - assert(!is_inuse(p)); + assert(!is_inuse(p)); assert(!next_pinuse(p)); assert (!is_mmapped(p)); if (p != m->dv && p != m->top) { @@ -3174,7 +3282,7 @@ static void do_check_free_chunk(mstate m, mchunkptr p) { assert(is_aligned(chunk2mem(p))); assert(next->prev_foot == sz); assert(pinuse(p)); - assert (next == m->top || is_inuse(next)); + assert (next == m->top || is_inuse(next)); assert(p->fd->bk == p); assert(p->bk->fd == p); } @@ -3187,7 +3295,7 @@ static void do_check_free_chunk(mstate m, mchunkptr p) { static void do_check_malloced_chunk(mstate m, void* mem, size_t s) { if (mem != 0) { mchunkptr p = mem2chunk(mem); - size_t sz = p->head & ~INUSE_BITS; + size_t sz = p->head & ~INUSE_BITS; do_check_inuse_chunk(m, p); assert((sz & CHUNK_ALIGN_MASK) == 0); assert(sz >= MIN_CHUNK_SIZE); @@ -3214,7 +3322,7 @@ static void do_check_tree(mstate m, tchunkptr t) { do_check_any_chunk(m, ((mchunkptr)u)); assert(u->index == tindex); assert(chunksize(u) == tsize); - assert(!is_inuse(u)); + assert(!is_inuse(u)); assert(!next_pinuse(u)); assert(u->fd->bk == u); assert(u->bk->fd == u); @@ -3332,13 +3440,13 @@ static size_t traverse_and_check(mstate m) { while (segment_holds(s, q) && q != m->top && q->head != FENCEPOST_HEAD) { sum += chunksize(q); - if (is_inuse(q)) { + if (is_inuse(q)) { assert(!bin_find(m, q)); do_check_inuse_chunk(m, q); } else { assert(q == m->dv || bin_find(m, q)); - assert(lastq == 0 || is_inuse(lastq)); /* Not 2 consecutive free */ + assert(lastq == 0 || is_inuse(lastq)); /* Not 2 consecutive free */ do_check_free_chunk(m, q); } lastq = q; @@ -3399,7 +3507,7 @@ static struct mallinfo internal_mallinfo(mstate m) { q != m->top && q->head != FENCEPOST_HEAD) { size_t sz = chunksize(q); sum += sz; - if (!is_inuse(q)) { + if (!is_inuse(q)) { mfree += sz; ++nfree; } @@ -3440,7 +3548,7 @@ static void internal_malloc_stats(mstate m) { mchunkptr q = align_as_chunk(s->base); while (segment_holds(s, q) && q != m->top && q->head != FENCEPOST_HEAD) { - if (!is_inuse(q)) + if (!is_inuse(q)) used -= chunksize(q); q = next_chunk(q); } @@ -3713,7 +3821,7 @@ static void internal_malloc_stats(mstate m) { the mmapped region stored in the prev_foot field of the chunk. This allows reconstruction of the required argument to MUNMAP when freed, and also allows adjustment of the returned chunk to meet alignment - requirements (especially in memalign). + requirements (especially in memalign). */ /* Malloc using mmap */ @@ -3725,13 +3833,13 @@ static void* mmap_alloc(mstate m, size_t nb) { size_t offset = align_offset(chunk2mem(mm)); size_t psize = mmsize - offset - MMAP_FOOT_PAD; mchunkptr p = (mchunkptr)(mm + offset); - p->prev_foot = offset; - p->head = psize; + p->prev_foot = offset; + p->head = psize; mark_inuse_foot(m, p, psize); chunk_plus_offset(p, psize)->head = FENCEPOST_HEAD; chunk_plus_offset(p, psize+SIZE_T_SIZE)->head = 0; - if (m->least_addr == 0 || mm < m->least_addr) + if (m->least_addr == 0 || mm < m->least_addr) m->least_addr = mm; if ((m->footprint += mmsize) > m->max_footprint) m->max_footprint = m->footprint; @@ -3753,7 +3861,7 @@ static mchunkptr mmap_resize(mstate m, mchunkptr oldp, size_t nb) { (oldsize - nb) <= (mparams.granularity << 1)) return oldp; else { - size_t offset = oldp->prev_foot; + size_t offset = oldp->prev_foot; size_t oldmmsize = oldsize + offset + MMAP_FOOT_PAD; size_t newmmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK); char* cp = (char*)CALL_MREMAP((char*)oldp - offset, @@ -3761,7 +3869,7 @@ static mchunkptr mmap_resize(mstate m, mchunkptr oldp, size_t nb) { if (cp != CMFAIL) { mchunkptr newp = (mchunkptr)(cp + offset); size_t psize = newmmsize - offset - MMAP_FOOT_PAD; - newp->head = psize; + newp->head = psize; mark_inuse_foot(m, newp, psize); chunk_plus_offset(newp, psize)->head = FENCEPOST_HEAD; chunk_plus_offset(newp, psize+SIZE_T_SIZE)->head = 0; @@ -3850,7 +3958,7 @@ static void* prepend_alloc(mstate m, char* newbase, char* oldbase, set_size_and_pinuse_of_free_chunk(q, dsize); } else { - if (!is_inuse(oldfirst)) { + if (!is_inuse(oldfirst)) { size_t nsize = chunksize(oldfirst); unlink_chunk(m, oldfirst, nsize); oldfirst = chunk_plus_offset(oldfirst, nsize); @@ -3928,8 +4036,8 @@ static void* sys_alloc(mstate m, size_t nb) { ensure_initialization(); - /* Directly map large chunks, but only if already initialized */ - if (use_mmap(m) && nb >= mparams.mmap_threshold && m->topsize != 0) { + /* Directly map large chunks, but only if already initialized */ + if (use_mmap(m) && nb >= mparams.mmap_threshold && m->topsize != 0) { void* mem = mmap_alloc(m, nb); if (mem != 0) return mem; @@ -4023,7 +4131,7 @@ static void* sys_alloc(mstate m, size_t nb) { if (mp != CMFAIL) { tbase = mp; tsize = rsize; - mmap_flag = USE_MMAP_BIT; + mmap_flag = USE_MMAP_BIT; } } } @@ -4053,9 +4161,9 @@ static void* sys_alloc(mstate m, size_t nb) { m->max_footprint = m->footprint; if (!is_initialized(m)) { /* first-time initialization */ - if (m->least_addr == 0 || tbase < m->least_addr) - m->least_addr = tbase; - m->seg.base = tbase; + if (m->least_addr == 0 || tbase < m->least_addr) + m->least_addr = tbase; + m->seg.base = tbase; m->seg.size = tsize; m->seg.sflags = mmap_flag; m->magic = mparams.magic; @@ -4081,7 +4189,7 @@ static void* sys_alloc(mstate m, size_t nb) { sp = (NO_SEGMENT_TRAVERSAL) ? 0 : sp->next; if (sp != 0 && !is_extern_segment(sp) && - (sp->sflags & USE_MMAP_BIT) == mmap_flag && + (sp->sflags & USE_MMAP_BIT) == mmap_flag && segment_holds(sp, m->top)) { /* append */ sp->size += tsize; init_top(m, m->top, m->topsize + tsize); @@ -4094,7 +4202,7 @@ static void* sys_alloc(mstate m, size_t nb) { sp = (NO_SEGMENT_TRAVERSAL) ? 0 : sp->next; if (sp != 0 && !is_extern_segment(sp) && - (sp->sflags & USE_MMAP_BIT) == mmap_flag) { + (sp->sflags & USE_MMAP_BIT) == mmap_flag) { char* oldbase = sp->base; sp->base = tbase; sp->size += tsize; @@ -4138,7 +4246,7 @@ static size_t release_unused_segments(mstate m) { mchunkptr p = align_as_chunk(base); size_t psize = chunksize(p); /* Can unmap if first chunk holds entire segment and not pinned */ - if (!is_inuse(p) && (char*)p + psize >= base + size - TOP_FOOT_SIZE) { + if (!is_inuse(p) && (char*)p + psize >= base + size - TOP_FOOT_SIZE) { tchunkptr tp = (tchunkptr)p; assert(segment_holds(sp, (char*)sp)); if (p == m->dv) { @@ -4363,7 +4471,7 @@ static void* internal_realloc(mstate m, void* oldmem, size_t bytes) { /* Try to either shrink or extend into top. Else malloc-copy-free */ - if (RTCHECK(ok_address(m, oldp) && ok_inuse(oldp) && + if (RTCHECK(ok_address(m, oldp) && ok_inuse(oldp) && ok_next(oldp, next) && ok_pinuse(next))) { size_t nb = request2size(bytes); if (is_mmapped(oldp)) @@ -4374,7 +4482,7 @@ static void* internal_realloc(mstate m, void* oldmem, size_t bytes) { if (rsize >= MIN_CHUNK_SIZE) { mchunkptr remainder = chunk_plus_offset(newp, nb); set_inuse(m, newp, nb); - set_inuse_and_pinuse(m, remainder, rsize); + set_inuse_and_pinuse(m, remainder, rsize); extra = chunk2mem(remainder); } } @@ -4395,11 +4503,11 @@ static void* internal_realloc(mstate m, void* oldmem, size_t bytes) { POSTACTION(m); return 0; } -#if DEBUG - if (newp != 0) { - check_inuse_chunk(m, newp); /* Check requires lock */ - } -#endif +#if DEBUG + if (newp != 0) { + check_inuse_chunk(m, newp); /* Check requires lock */ + } +#endif POSTACTION(m); @@ -4471,7 +4579,7 @@ static void* internal_memalign(mstate m, size_t alignment, size_t bytes) { if (is_mmapped(p)) { /* For mmapped chunks, just adjust offset */ newp->prev_foot = p->prev_foot + leadsize; - newp->head = newsize; + newp->head = newsize; } else { /* Otherwise, give back leader, use the rest */ set_inuse(m, newp, newsize); @@ -4799,12 +4907,12 @@ void dlfree(void* mem) { #endif /* FOOTERS */ if (!PREACTION(fm)) { check_inuse_chunk(fm, p); - if (RTCHECK(ok_address(fm, p) && ok_inuse(p))) { + if (RTCHECK(ok_address(fm, p) && ok_inuse(p))) { size_t psize = chunksize(p); mchunkptr next = chunk_plus_offset(p, psize); if (!pinuse(p)) { size_t prevsize = p->prev_foot; - if (is_mmapped(p)) { + if (is_mmapped(p)) { psize += prevsize + MMAP_FOOT_PAD; if (CALL_MUNMAP((char*)p - prevsize, psize) == 0) fm->footprint -= psize; @@ -4957,7 +5065,7 @@ void* dlpvalloc(size_t bytes) { int dlmalloc_trim(size_t pad) { int result = 0; - ensure_initialization(); + ensure_initialization(); if (!PREACTION(gm)) { result = sys_trim(gm, pad); POSTACTION(gm); @@ -4992,7 +5100,7 @@ int dlmallopt(int param_number, int value) { size_t dlmalloc_usable_size(void* mem) { if (mem != 0) { mchunkptr p = mem2chunk(mem); - if (is_inuse(p)) + if (is_inuse(p)) return chunksize(p) - overhead_for(p); } return 0; @@ -5009,7 +5117,7 @@ static mstate init_user_mstate(char* tbase, size_t tsize) { mstate m = (mstate)(chunk2mem(msp)); memset(m, 0, msize); INITIAL_LOCK(&m->mutex); - msp->head = (msize|INUSE_BITS); + msp->head = (msize|INUSE_BITS); m->seg.base = m->least_addr = tbase; m->seg.size = m->footprint = m->max_footprint = tsize; m->magic = mparams.magic; @@ -5037,7 +5145,7 @@ mspace create_mspace(size_t capacity, int locked) { char* tbase = (char*)(CALL_MMAP(tsize)); if (tbase != CMFAIL) { m = init_user_mstate(tbase, tsize); - m->seg.sflags = USE_MMAP_BIT; + m->seg.sflags = USE_MMAP_BIT; set_lock(m, locked); } } @@ -5058,13 +5166,13 @@ mspace create_mspace_with_base(void* base, size_t capacity, int locked) { return (mspace)m; } -int mspace_track_large_chunks(mspace msp, int enable) { +int mspace_track_large_chunks(mspace msp, int enable) { int ret = 0; mstate ms = (mstate)msp; if (!PREACTION(ms)) { - if (!use_mmap(ms)) + if (!use_mmap(ms)) ret = 1; - if (!enable) + if (!enable) enable_mmap(ms); else disable_mmap(ms); @@ -5083,7 +5191,7 @@ size_t destroy_mspace(mspace msp) { size_t size = sp->size; flag_t flag = sp->sflags; sp = sp->next; - if ((flag & USE_MMAP_BIT) && !(flag & EXTERN_BIT) && + if ((flag & USE_MMAP_BIT) && !(flag & EXTERN_BIT) && CALL_MUNMAP(base, size) == 0) freed += size; } @@ -5219,7 +5327,7 @@ void mspace_free(mspace msp, void* mem) { mchunkptr p = mem2chunk(mem); #if FOOTERS mstate fm = get_mstate_for(p); - msp = msp; /* placate people compiling -Wunused */ + msp = msp; /* placate people compiling -Wunused */ #else /* FOOTERS */ mstate fm = (mstate)msp; #endif /* FOOTERS */ @@ -5229,12 +5337,12 @@ void mspace_free(mspace msp, void* mem) { } if (!PREACTION(fm)) { check_inuse_chunk(fm, p); - if (RTCHECK(ok_address(fm, p) && ok_inuse(p))) { + if (RTCHECK(ok_address(fm, p) && ok_inuse(p))) { size_t psize = chunksize(p); mchunkptr next = chunk_plus_offset(p, psize); if (!pinuse(p)) { size_t prevsize = p->prev_foot; - if (is_mmapped(p)) { + if (is_mmapped(p)) { psize += prevsize + MMAP_FOOT_PAD; if (CALL_MUNMAP((char*)p - prevsize, psize) == 0) fm->footprint -= psize; @@ -5453,7 +5561,7 @@ struct mallinfo mspace_mallinfo(mspace msp) { size_t mspace_usable_size(void* mem) { if (mem != 0) { mchunkptr p = mem2chunk(mem); - if (is_inuse(p)) + if (is_inuse(p)) return chunksize(p) - overhead_for(p); } return 0; @@ -5465,6 +5573,7 @@ int mspace_mallopt(int param_number, int value) { #endif /* MSPACES */ + /* -------------------- Alternative MORECORE functions ------------------- */ /* @@ -5559,15 +5668,15 @@ int mspace_mallopt(int param_number, int value) { /* ----------------------------------------------------------------------- History: - V2.8.4 Wed May 27 09:56:23 2009 Doug Lea (dl at gee) - * Use zeros instead of prev foot for is_mmapped - * Add mspace_track_large_chunks; thanks to Jean Brouwers - * Fix set_inuse in internal_realloc; thanks to Jean Brouwers + V2.8.4 Wed May 27 09:56:23 2009 Doug Lea (dl at gee) + * Use zeros instead of prev foot for is_mmapped + * Add mspace_track_large_chunks; thanks to Jean Brouwers + * Fix set_inuse in internal_realloc; thanks to Jean Brouwers * Fix insufficient sys_alloc padding when using 16byte alignment * Fix bad error check in mspace_footprint - * Adaptations for ptmalloc; thanks to Wolfram Gloger. - * Reentrant spin locks; thanks to Earl Chew and others - * Win32 improvements; thanks to Niall Douglas and Earl Chew + * Adaptations for ptmalloc; thanks to Wolfram Gloger. + * Reentrant spin locks; thanks to Earl Chew and others + * Win32 improvements; thanks to Niall Douglas and Earl Chew * Add NO_SEGMENT_TRAVERSAL and MAX_RELEASE_CHECK_RATE options * Extension hook in malloc_state * Various small adjustments to reduce warnings on some compilers @@ -5753,4 +5862,3 @@ History: */ - diff --git a/polymer/eduke32/build/include/nedmalloc.h b/polymer/eduke32/build/include/nedmalloc.h index 756188901..f228a96ae 100644 --- a/polymer/eduke32/build/include/nedmalloc.h +++ b/polymer/eduke32/build/include/nedmalloc.h @@ -1,5 +1,5 @@ /* nedalloc, an alternative malloc implementation for multiple threads without -lock contention based on dlmalloc v2.8.3. (C) 2005 Niall Douglas +lock contention based on dlmalloc v2.8.3. (C) 2005-2009 Niall Douglas Boost Software License - Version 1.0 - August 17th, 2003 @@ -29,8 +29,6 @@ DEALINGS IN THE SOFTWARE. #ifndef NEDMALLOC_H #define NEDMALLOC_H -#define THREADCACHEMAX 65536 -#define THREADCACHEMAXFREESPACE (1024*1024*4) /* See malloc.c.h for what each function does. @@ -40,19 +38,34 @@ free etc. instead of nedmalloc, nedfree etc. You may or may not want this. NO_NED_NAMESPACE prevents the functions from being defined in the nedalloc namespace when in C++ (uses the global namespace instead). -EXTSPEC can be defined to be __declspec(dllexport) or +NEDMALLOCEXTSPEC can be defined to be __declspec(dllexport) or __attribute__ ((visibility("default"))) or whatever you like. It defaults -to extern. +to extern unless NEDMALLOC_DLL_EXPORTS is set as it would be when building +nedmalloc.dll. USE_LOCKS can be 2 if you want to define your own MLOCK_T, INITIAL_LOCK, ACQUIRE_LOCK, RELEASE_LOCK, TRY_LOCK, IS_LOCKED and NULL_LOCK_INITIALIZER. +USE_MAGIC_HEADERS causes nedalloc to allocate an extra three sizeof(size_t) +to each block. nedpfree() and nedprealloc() can then automagically know when +to free a system allocated block. Enabling this typically adds 20-50% to +application memory usage. + +USE_ALLOCATOR can be one of these settings: + 0: System allocator (nedmalloc now simply acts as a threadcache). + WARNING: Intended for DEBUG USE ONLY - not all functions work correctly. + 1: dlmalloc + */ #include /* for size_t */ -#ifndef EXTSPEC - #define EXTSPEC extern +#ifndef NEDMALLOCEXTSPEC + #ifdef NEDMALLOC_DLL_EXPORTS + #define NEDMALLOCEXTSPEC extern __declspec(dllexport) + #else + #define NEDMALLOCEXTSPEC extern + #endif #endif #if defined(_MSC_VER) && _MSC_VER>=1400 @@ -65,32 +78,44 @@ ACQUIRE_LOCK, RELEASE_LOCK, TRY_LOCK, IS_LOCKED and NULL_LOCK_INITIALIZER. #define NEDMALLOCPTRATTR #endif +#ifndef USE_MAGIC_HEADERS + #define USE_MAGIC_HEADERS 0 +#endif + +#ifndef USE_ALLOCATOR + #define USE_ALLOCATOR 1 /* dlmalloc */ +#endif + +#if !USE_ALLOCATOR && !USE_MAGIC_HEADERS +#error If you are using the system allocator then you MUST use magic headers +#endif + #ifdef REPLACE_SYSTEM_ALLOCATOR - #define nedmalloc malloc - #define nedcalloc calloc - #define nedrealloc realloc - #define nedfree free - #define nedmemalign memalign - #define nedmallinfo mallinfo - #define nedmallopt mallopt - #define nedmalloc_trim malloc_trim - #define nedmalloc_stats malloc_stats - #define nedmalloc_footprint malloc_footprint - #define nedindependent_calloc independent_calloc - #define nedindependent_comalloc independent_comalloc - #ifdef _MSC_VER - #define nedblksize _msize + #if USE_ALLOCATOR==0 + #error Cannot combine using the system allocator with replacing the system allocator + #endif + #ifndef WIN32 /* We have a dedidicated patcher for Windows */ + #define nedmalloc malloc + #define nedcalloc calloc + #define nedrealloc realloc + #define nedfree free + #define nedmemalign memalign + #define nedmallinfo mallinfo + #define nedmallopt mallopt + #define nedmalloc_trim malloc_trim + #define nedmalloc_stats malloc_stats + #define nedmalloc_footprint malloc_footprint + #define nedindependent_calloc independent_calloc + #define nedindependent_comalloc independent_comalloc + #ifdef _MSC_VER + #define nedblksize _msize + #endif #endif #endif -#ifndef _MSC_VER -#ifndef UNREFERENCED_PARAMETER -#define UNREFERENCED_PARAMETER(x) x=x -#endif -#endif #ifndef NO_MALLINFO -#define NO_MALLINFO 0 + #define NO_MALLINFO 0 #endif #if !NO_MALLINFO @@ -117,33 +142,36 @@ extern "C" { /* These are the global functions */ /* Gets the usable size of an allocated block. Note this will always be bigger than what was -asked for due to rounding etc. +asked for due to rounding etc. Tries to return zero if this is not a nedmalloc block (though +one could see a segfault up to 6.25% of the time). On Win32 SEH is used to guarantee that a +segfault never happens. */ -EXTSPEC size_t nedblksize(void *mem) THROWSPEC; +NEDMALLOCEXTSPEC size_t nedblksize(void *mem) THROWSPEC; -EXTSPEC void nedsetvalue(void *v) THROWSPEC; +NEDMALLOCEXTSPEC void nedsetvalue(void *v) THROWSPEC; -EXTSPEC NEDMALLOCPTRATTR void * nedmalloc(size_t size) THROWSPEC; -EXTSPEC NEDMALLOCPTRATTR void * nedcalloc(size_t no, size_t size) THROWSPEC; -EXTSPEC NEDMALLOCPTRATTR void * nedrealloc(void *mem, size_t size) THROWSPEC; -EXTSPEC void nedfree(void *mem) THROWSPEC; -EXTSPEC NEDMALLOCPTRATTR void * nedmemalign(size_t alignment, size_t bytes) THROWSPEC; +NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void * nedmalloc(size_t size) THROWSPEC; +NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void * nedcalloc(size_t no, size_t size) THROWSPEC; +NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void * nedrealloc(void *mem, size_t size) THROWSPEC; +NEDMALLOCEXTSPEC void nedfree(void *mem) THROWSPEC; +NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void * nedmemalign(size_t alignment, size_t bytes) THROWSPEC; #if !NO_MALLINFO -EXTSPEC struct mallinfo nedmallinfo(void) THROWSPEC; +NEDMALLOCEXTSPEC struct mallinfo nedmallinfo(void) THROWSPEC; #endif -EXTSPEC int nedmallopt(int parno, int value) THROWSPEC; -EXTSPEC int nedmalloc_trim(size_t pad) THROWSPEC; -EXTSPEC void nedmalloc_stats(void) THROWSPEC; -EXTSPEC size_t nedmalloc_footprint(void) THROWSPEC; -EXTSPEC NEDMALLOCPTRATTR void **nedindependent_calloc(size_t elemsno, size_t elemsize, void **chunks) THROWSPEC; -EXTSPEC NEDMALLOCPTRATTR void **nedindependent_comalloc(size_t elems, size_t *sizes, void **chunks) THROWSPEC; +NEDMALLOCEXTSPEC int nedmallopt(int parno, int value) THROWSPEC; +NEDMALLOCEXTSPEC void* nedmalloc_internals(size_t *granularity, size_t *magic) THROWSPEC; +NEDMALLOCEXTSPEC int nedmalloc_trim(size_t pad) THROWSPEC; +NEDMALLOCEXTSPEC void nedmalloc_stats(void) THROWSPEC; +NEDMALLOCEXTSPEC size_t nedmalloc_footprint(void) THROWSPEC; +NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void **nedindependent_calloc(size_t elemsno, size_t elemsize, void **chunks) THROWSPEC; +NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void **nedindependent_comalloc(size_t elems, size_t *sizes, void **chunks) THROWSPEC; /* Destroys the system memory pool used by the functions above. Useful for when you have nedmalloc in a DLL you're about to unload. If you call ANY nedmalloc functions after calling this you will get a fatal exception! */ -EXTSPEC void neddestroysyspool(void) THROWSPEC; +NEDMALLOCEXTSPEC void neddestroysyspool() THROWSPEC; /* These are the pool functions */ struct nedpool_t; @@ -156,52 +184,50 @@ will *normally* be accessing the pool concurrently. Setting this to zero means i extends on demand, but be careful of this as it can rapidly consume system resources where bursts of concurrent threads use a pool at once. */ -EXTSPEC NEDMALLOCPTRATTR nedpool *nedcreatepool(size_t capacity, int threads) THROWSPEC; +NEDMALLOCEXTSPEC NEDMALLOCPTRATTR nedpool *nedcreatepool(size_t capacity, int threads) THROWSPEC; /* Destroys a memory pool previously created by nedcreatepool(). */ -EXTSPEC void neddestroypool(nedpool *p) THROWSPEC; +NEDMALLOCEXTSPEC void neddestroypool(nedpool *p) THROWSPEC; /* Sets a value to be associated with a pool. You can retrieve this value by passing any memory block allocated from that pool. */ -EXTSPEC void nedpsetvalue(nedpool *p, void *v) THROWSPEC; +NEDMALLOCEXTSPEC void nedpsetvalue(nedpool *p, void *v) THROWSPEC; /* Gets a previously set value using nedpsetvalue() or zero if memory is unknown. Optionally can also retrieve pool. */ -EXTSPEC void *nedgetvalue(nedpool **p, void *mem) THROWSPEC; +NEDMALLOCEXTSPEC void *nedgetvalue(nedpool **p, void *mem) THROWSPEC; /* Trims the thread cache for the calling thread, returning any existing cache data to the central pool. Remember to ALWAYS call with zero if you used the system pool. Setting disable to non-zero replicates neddisablethreadcache(). */ -EXTSPEC void nedtrimthreadcache(nedpool *p, int disable) THROWSPEC; +NEDMALLOCEXTSPEC void nedtrimthreadcache(nedpool *p, int disable) THROWSPEC; /* Disables the thread cache for the calling thread, returning any existing cache data to the central pool. Remember to ALWAYS call with zero if you used the system pool. */ -EXTSPEC void neddisablethreadcache(nedpool *p) THROWSPEC; +NEDMALLOCEXTSPEC void neddisablethreadcache(nedpool *p) THROWSPEC; -EXTSPEC NEDMALLOCPTRATTR void * nedpmalloc(nedpool *p, size_t size) THROWSPEC; -EXTSPEC NEDMALLOCPTRATTR void * nedpcalloc(nedpool *p, size_t no, size_t size) THROWSPEC; -EXTSPEC NEDMALLOCPTRATTR void * nedprealloc(nedpool *p, void *mem, size_t size) THROWSPEC; -EXTSPEC void nedpfree(nedpool *p, void *mem) THROWSPEC; -EXTSPEC NEDMALLOCPTRATTR void * nedpmemalign(nedpool *p, size_t alignment, size_t bytes) THROWSPEC; +NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void * nedpmalloc(nedpool *p, size_t size) THROWSPEC; +NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void * nedpcalloc(nedpool *p, size_t no, size_t size) THROWSPEC; +NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void * nedprealloc(nedpool *p, void *mem, size_t size) THROWSPEC; +NEDMALLOCEXTSPEC void nedpfree(nedpool *p, void *mem) THROWSPEC; +NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void * nedpmemalign(nedpool *p, size_t alignment, size_t bytes) THROWSPEC; #if !NO_MALLINFO -EXTSPEC struct mallinfo nedpmallinfo(nedpool *p) THROWSPEC; +NEDMALLOCEXTSPEC struct mallinfo nedpmallinfo(nedpool *p) THROWSPEC; #endif -EXTSPEC int nedpmallopt(nedpool *p, int parno, int value) THROWSPEC; -EXTSPEC int nedpmalloc_trim(nedpool *p, size_t pad) THROWSPEC; -EXTSPEC void nedpmalloc_stats(nedpool *p) THROWSPEC; -EXTSPEC size_t nedpmalloc_footprint(nedpool *p) THROWSPEC; -EXTSPEC NEDMALLOCPTRATTR void **nedpindependent_calloc(nedpool *p, size_t elemsno, size_t elemsize, void **chunks) THROWSPEC; -EXTSPEC NEDMALLOCPTRATTR void **nedpindependent_comalloc(nedpool *p, size_t elems, size_t *sizes, void **chunks) THROWSPEC; +NEDMALLOCEXTSPEC int nedpmallopt(nedpool *p, int parno, int value) THROWSPEC; +NEDMALLOCEXTSPEC int nedpmalloc_trim(nedpool *p, size_t pad) THROWSPEC; +NEDMALLOCEXTSPEC void nedpmalloc_stats(nedpool *p) THROWSPEC; +NEDMALLOCEXTSPEC size_t nedpmalloc_footprint(nedpool *p) THROWSPEC; +NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void **nedpindependent_calloc(nedpool *p, size_t elemsno, size_t elemsize, void **chunks) THROWSPEC; +NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void **nedpindependent_comalloc(nedpool *p, size_t elems, size_t *sizes, void **chunks) THROWSPEC; #if defined(__cplusplus) } #endif -#undef EXTSPEC - #endif diff --git a/polymer/eduke32/build/src/nedmalloc.c b/polymer/eduke32/build/src/nedmalloc.c index 95e85c8f1..45a1a2cdd 100644 --- a/polymer/eduke32/build/src/nedmalloc.c +++ b/polymer/eduke32/build/src/nedmalloc.c @@ -1,5 +1,5 @@ /* Alternative malloc implementation for multiple threads without -lock contention based on dlmalloc. (C) 2005-2006 Niall Douglas +lock contention based on dlmalloc. (C) 2005-2009 Niall Douglas Boost Software License - Version 1.0 - August 17th, 2003 @@ -36,35 +36,44 @@ DEALINGS IN THE SOFTWARE. #endif /*#define FULLSANITYCHECKS*/ +#define USE_ALLOCATOR 1 +#define REPLACE_SYSTEM_ALLOCATOR 1 +#define USE_MAGIC_HEADERS 1 #include "nedmalloc.h" -#ifdef _WIN32 -#include -#include +#ifdef WIN32 + #include + #include #endif -#define MSPACES 1 -#define ONLY_MSPACES 1 +#if USE_ALLOCATOR==1 + #define MSPACES 1 + #define ONLY_MSPACES 1 +#endif +#define USE_DL_PREFIX 1 #ifndef USE_LOCKS -#define USE_LOCKS 1 + #define USE_LOCKS 1 #endif #define FOOTERS 1 /* Need to enable footers so frees lock the right mspace */ #if defined(DEBUG) && !defined(_DEBUG) -#define _DEBUG + #define _DEBUG #elif !defined(NDEBUG) && !defined(DEBUG) && !defined(_DEBUG) -#define NDEBUG + #define NDEBUG #endif #undef DEBUG /* dlmalloc wants DEBUG either 0 or 1 */ #ifdef _DEBUG -#define DEBUG 1 + #define DEBUG 1 #else -#define DEBUG 0 + #define DEBUG 0 #endif #ifdef NDEBUG /* Disable assert checking on release builds */ -#undef DEBUG + #undef DEBUG #endif /* The default of 64Kb means we spend too much time kernel-side */ #ifndef DEFAULT_GRANULARITY #define DEFAULT_GRANULARITY (1*1024*1024) +#if DEBUG +#define DEFAULT_GRANULARITY_ALIGNED +#endif #endif /*#define USE_SPIN_LOCKS 0*/ @@ -72,7 +81,7 @@ DEALINGS IN THE SOFTWARE. /*#define FORCEINLINE*/ #include "malloc.c.h" #ifdef NDEBUG /* Disable assert checking on release builds */ -#undef DEBUG + #undef DEBUG #endif #if defined(__GNUC__) && defined(DEBUG) #warning DEBUG is defined so allocator will run with assert checking! Define NDEBUG to run at full speed. @@ -88,92 +97,229 @@ DEALINGS IN THE SOFTWARE. #endif /* The maximum size to be allocated from the thread cache */ #ifndef THREADCACHEMAX -#define THREADCACHEMAX 8192 +#define THREADCACHEMAX 65536 #endif #if 1 /* The number of cache entries for finer grained bins. This is (topbitpos(THREADCACHEMAX)-4)*2 */ -#if THREADCACHEMAX == 8192 -#define THREADCACHEMAXBINS ((13-4)*2) -#elif THREADCACHEMAX == 65536 #define THREADCACHEMAXBINS ((16-4)*2) #else -#error undefined size -#endif -#else /* The number of cache entries. This is (topbitpos(THREADCACHEMAX)-4) */ -#if THREADCACHEMAX == 8192 -#define THREADCACHEMAXBINS (13-4) -#elif THREADCACHEMAX == 65536 #define THREADCACHEMAXBINS (16-4) -#else -#error undefined size -#endif #endif /* Point at which the free space in a thread cache is garbage collected */ #ifndef THREADCACHEMAXFREESPACE -#define THREADCACHEMAXFREESPACE (512*1024) +#define THREADCACHEMAXFREESPACE (512*1024*8) #endif #ifdef WIN32 -#define TLSVAR DWORD -#define TLSALLOC(k) (*(k)=TlsAlloc(), TLS_OUT_OF_INDEXES==*(k)) -#define TLSFREE(k) (!TlsFree(k)) -#define TLSGET(k) TlsGetValue(k) -#define TLSSET(k, a) (!TlsSetValue(k, a)) -#ifdef DEBUG + #define TLSVAR DWORD + #define TLSALLOC(k) (*(k)=TlsAlloc(), TLS_OUT_OF_INDEXES==*(k)) + #define TLSFREE(k) (!TlsFree(k)) + #define TLSGET(k) TlsGetValue(k) + #define TLSSET(k, a) (!TlsSetValue(k, a)) + #ifdef DEBUG static LPVOID ChkedTlsGetValue(DWORD idx) { - LPVOID ret=TlsGetValue(idx); - assert(S_OK==GetLastError()); - return ret; + LPVOID ret=TlsGetValue(idx); + assert(S_OK==GetLastError()); + return ret; } -#undef TLSGET -#define TLSGET(k) ChkedTlsGetValue(k) -#endif + #undef TLSGET + #define TLSGET(k) ChkedTlsGetValue(k) + #endif #else -#define TLSVAR pthread_key_t -#define TLSALLOC(k) pthread_key_create(k, 0) -#define TLSFREE(k) pthread_key_delete(k) -#define TLSGET(k) pthread_getspecific(k) -#define TLSSET(k, a) pthread_setspecific(k, a) + #define TLSVAR pthread_key_t + #define TLSALLOC(k) pthread_key_create(k, 0) + #define TLSFREE(k) pthread_key_delete(k) + #define TLSGET(k) pthread_getspecific(k) + #define TLSSET(k, a) pthread_setspecific(k, a) #endif -#if 0 -/* Only enable if testing with valgrind. Causes misoperation */ -#define mspace_malloc(p, s) malloc(s) -#define mspace_realloc(p, m, s) realloc(m, s) -#define mspace_calloc(p, n, s) calloc(n, s) -#define mspace_free(p, m) free(m) -#endif - - #if defined(__cplusplus) #if !defined(NO_NED_NAMESPACE) -namespace nedalloc -{ +namespace nedalloc { #else -extern "C" -{ +extern "C" { #endif #endif +static void *unsupported_operation(const char *opname) THROWSPEC +{ + fprintf(stderr, "nedmalloc: The operation %s is not supported under this build configuration\n", opname); + abort(); + return 0; +} +static size_t mspacecounter=(size_t) 0xdeadbeef; + +static FORCEINLINE void *CallMalloc(void *mspace, size_t size, size_t alignment) THROWSPEC +{ + void *ret=0; +#if USE_MAGIC_HEADERS + size_t *_ret=0; + size+=alignment+3*sizeof(size_t); +#endif +#if USE_ALLOCATOR==0 + ret=malloc(size); +#elif USE_ALLOCATOR==1 + ret=mspace_malloc((mstate) mspace, size); +#endif + if(!ret) return 0; +#if USE_MAGIC_HEADERS + _ret=(size_t *) ret; + ret=(void *)(_ret+3); + if(alignment) ret=(void *)(((size_t) ret+alignment-1)&~(alignment-1)); + for(; _ret<(size_t *)ret-2; _ret++) *_ret=*(size_t *)"NEDMALOC"; + _ret[0]=(size_t) mspace; + _ret[1]=size; +#endif + return ret; +} + +static FORCEINLINE void *CallCalloc(void *mspace, size_t no, size_t size, size_t alignment) THROWSPEC +{ + void *ret=0; +#if USE_MAGIC_HEADERS + size_t *_ret=0; + size+=alignment+3*sizeof(size_t); +#endif +#if USE_ALLOCATOR==0 + ret=calloc(no, size); +#elif USE_ALLOCATOR==1 + ret=mspace_calloc((mstate) mspace, no, size); +#endif + if(!ret) return 0; +#if USE_MAGIC_HEADERS + _ret=(size_t *) ret; + ret=(void *)(_ret+3); + if(alignment) ret=(void *)(((size_t) ret+alignment-1)&~(alignment-1)); + for(; _ret<(size_t *)ret-2; _ret++) *_ret=*(size_t *) "NEDMALOC"; + _ret[0]=(size_t) mspace; + _ret[1]=size; +#endif + return ret; +} + +static FORCEINLINE void *CallRealloc(void *mspace, void *mem, size_t size) THROWSPEC +{ + void *ret=0; +#if USE_MAGIC_HEADERS + mstate oldmspace=0; + size_t *_ret=0, *_mem=(size_t *) mem-3, oldsize=0; + if(_mem[0]!=*(size_t *) "NEDMALOC") + { /* Transfer */ + if((ret=CallMalloc(mspace, size, 0))) + { /* It's probably safe to copy size bytes from mem - can't do much different */ +#if defined(DEBUG) + printf("*** nedmalloc frees system allocated block %p\n", mem); +#endif + memcpy(ret, mem, size); + free(mem); + } + return ret; + } + size+=3*sizeof(size_t); + oldmspace=(mstate) _mem[1]; + oldsize=_mem[2]; + for(; *_mem==*(size_t *) "NEDMALOC"; *_mem--=0); + mem=(void *)(++_mem); +#endif +#if USE_ALLOCATOR==0 + ret=realloc(mem, size); +#elif USE_ALLOCATOR==1 + ret=mspace_realloc((mstate) mspace, mem, size); +#endif + if(!ret) + { /* Put it back the way it was */ +#if USE_MAGIC_HEADERS + for(; *_mem==0; *_mem++=*(size_t *) "NEDMALOC"); +#endif + return 0; + } +#if USE_MAGIC_HEADERS + _ret=(size_t *) ret; + ret=(void *)(_ret+3); + for(; _ret<(size_t *)ret-2; _ret++) *_ret=*(size_t *) "NEDMALOC"; + _ret[0]=(size_t) mspace; + _ret[1]=size; +#endif + return ret; +} + +static FORCEINLINE void CallFree(void *mspace, void *mem) THROWSPEC +{ +#if USE_MAGIC_HEADERS + mstate oldmspace=0; + size_t *_mem=(size_t *) mem-3, oldsize=0; + if(_mem[0]!=*(size_t *) "NEDMALOC") + { +#if defined(DEBUG) + printf("*** nedmalloc frees system allocated block %p\n", mem); +#endif + free(mem); + return; + } + oldmspace=(mstate) _mem[1]; + oldsize=_mem[2]; + for(; *_mem==*(size_t *) "NEDMALOC"; *_mem--=0); + mem=(void *)(++_mem); +#endif +#if USE_ALLOCATOR==0 + free(mem); +#elif USE_ALLOCATOR==1 + mspace_free((mstate) mspace, mem); +#endif +} + size_t nedblksize(void *mem) THROWSPEC { -#if 0 - /* Only enable if testing with valgrind. Causes misoperation */ - return THREADCACHEMAX; + if(mem) + { +#if USE_MAGIC_HEADERS + size_t *_mem=(size_t *) mem-3; + if(_mem[0]==*(size_t *) "NEDMALOC") + { + mstate mspace=(mstate) _mem[1]; + size_t size=_mem[2]; + return size-3*sizeof(size_t); + } + else return 0; #else - if (mem) - { - mchunkptr p=mem2chunk(mem); - mstate fm = get_mstate_for(p); - assert(ok_magic(fm)); /* If this fails, someone tried to free a block twice */ - if (ok_magic(fm)) - return chunksize(p)-overhead_for(p); - } - return 0; +#if USE_ALLOCATOR==0 + /* Fail everything */ + return 0; +#elif USE_ALLOCATOR==1 +#ifdef WIN32 + __try #endif + { + /* We try to return zero here if it isn't one of our own blocks, however + the current block annotation scheme used by dlmalloc makes it impossible + to be absolutely sure of avoiding a segfault. + + mchunkptr->prev_foot = mem-(2*size_t) = mstate ^ mparams.magic for PRECEDING block; + mchunkptr->head = mem-(1*size_t) = 8 multiple size of this block with bottom three bits = FLAG_BITS + */ + mchunkptr p=mem2chunk(mem); + mstate fm=0; + if(!is_inuse(p)) return 0; + /* The following isn't safe but is probably true: unlikely to allocate + a 2Gb block on a 32bit system or a 8Eb block on a 64 bit system */ + if(p->head & ((size_t)1)<<(SIZE_T_BITSIZE-SIZE_T_ONE)) return 0; + /* We have now reduced our chances of being wrong to 0.5^4 = 6.25%. + We could start comparing prev_foot's for similarity but it starts getting slow. */ + fm = get_mstate_for(p); + assert(ok_magic(fm)); /* If this fails, someone tried to free a block twice */ + if(ok_magic(fm)) + return chunksize(p)-overhead_for(p); + } +#ifdef WIN32 + __except(1) { } +#endif +#endif +#endif + } + return 0; } void nedsetvalue(void *v) THROWSPEC { nedpsetvalue((nedpool *) 0, v); } @@ -197,378 +343,367 @@ typedef struct threadcacheblk_t threadcacheblk; struct threadcacheblk_t { /* Keep less than 16 bytes on 32 bit systems and 32 bytes on 64 bit systems */ #ifdef FULLSANITYCHECKS - unsigned int magic; + unsigned int magic; #endif - unsigned int lastUsed, size; - threadcacheblk *next, *prev; + unsigned int lastUsed, size; + threadcacheblk *next, *prev; }; typedef struct threadcache_t { #ifdef FULLSANITYCHECKS - unsigned int magic1; + unsigned int magic1; #endif - int mymspace; /* Last mspace entry this thread used */ - long threadid; - unsigned int mallocs, frees, successes; - size_t freeInCache; /* How much free space is stored in this cache */ - threadcacheblk *bins[(THREADCACHEMAXBINS+1)*2]; + int mymspace; /* Last mspace entry this thread used */ + long threadid; + unsigned int mallocs, frees, successes; + size_t freeInCache; /* How much free space is stored in this cache */ + threadcacheblk *bins[(THREADCACHEMAXBINS+1)*2]; #ifdef FULLSANITYCHECKS - unsigned int magic2; + unsigned int magic2; #endif } threadcache; struct nedpool_t { - MLOCK_T mutex; - void *uservalue; - int threads; /* Max entries in m to use */ - threadcache *caches[THREADCACHEMAXCACHES]; - TLSVAR mycache; /* Thread cache for this thread. 0 for unset, negative for use mspace-1 directly, otherwise is cache-1 */ - mstate m[MAXTHREADSINPOOL+1]; /* mspace entries for this pool */ + MLOCK_T mutex; + void *uservalue; + int threads; /* Max entries in m to use */ + threadcache *caches[THREADCACHEMAXCACHES]; + TLSVAR mycache; /* Thread cache for this thread. 0 for unset, negative for use mspace-1 directly, otherwise is cache-1 */ + mstate m[MAXTHREADSINPOOL+1]; /* mspace entries for this pool */ }; static nedpool syspool; static FORCEINLINE unsigned int size2binidx(size_t _size) THROWSPEC { /* 8=1000 16=10000 20=10100 24=11000 32=100000 48=110000 4096=1000000000000 */ - unsigned int topbit, size=(unsigned int)(_size>>4); - /* 16=1 20=1 24=1 32=10 48=11 64=100 96=110 128=1000 4096=100000000 */ + unsigned int topbit, size=(unsigned int)(_size>>4); + /* 16=1 20=1 24=1 32=10 48=11 64=100 96=110 128=1000 4096=100000000 */ #if defined(__GNUC__) - topbit = sizeof(size)*__CHAR_BIT__ - 1 - __builtin_clz(size); + topbit = sizeof(size)*__CHAR_BIT__ - 1 - __builtin_clz(size); #elif defined(_MSC_VER) && _MSC_VER>=1300 - { - unsigned long bsrTopBit; + { + unsigned long bsrTopBit; - _BitScanReverse(&bsrTopBit, size); + _BitScanReverse(&bsrTopBit, size); - topbit = bsrTopBit; - } + topbit = bsrTopBit; + } #else #if 0 - union - { - unsigned asInt[2]; - double asDouble; - }; - int n; + union { + unsigned asInt[2]; + double asDouble; + }; + int n; - asDouble = (double)size + 0.5; - topbit = (asInt[!FOX_BIGENDIAN] >> 20) - 1023; + asDouble = (double)size + 0.5; + topbit = (asInt[!FOX_BIGENDIAN] >> 20) - 1023; #else - { - unsigned int x=size; - x = x | (x >> 1); - x = x | (x >> 2); - x = x | (x >> 4); - x = x | (x >> 8); - x = x | (x >>16); - x = ~x; - x = x - ((x >> 1) & 0x55555555); - x = (x & 0x33333333) + ((x >> 2) & 0x33333333); - x = (x + (x >> 4)) & 0x0F0F0F0F; - x = x + (x << 8); - x = x + (x << 16); - topbit=31 - (x >> 24); - } + { + unsigned int x=size; + x = x | (x >> 1); + x = x | (x >> 2); + x = x | (x >> 4); + x = x | (x >> 8); + x = x | (x >>16); + x = ~x; + x = x - ((x >> 1) & 0x55555555); + x = (x & 0x33333333) + ((x >> 2) & 0x33333333); + x = (x + (x >> 4)) & 0x0F0F0F0F; + x = x + (x << 8); + x = x + (x << 16); + topbit=31 - (x >> 24); + } #endif #endif - return topbit; + return topbit; } #ifdef FULLSANITYCHECKS static void tcsanitycheck(threadcacheblk **ptr) THROWSPEC { - assert((ptr[0] && ptr[1]) || (!ptr[0] && !ptr[1])); - if (ptr[0] && ptr[1]) - { - assert(nedblksize(ptr[0])>=sizeof(threadcacheblk)); - assert(nedblksize(ptr[1])>=sizeof(threadcacheblk)); - assert(*(unsigned int *) "NEDN"==ptr[0]->magic); - assert(*(unsigned int *) "NEDN"==ptr[1]->magic); - assert(!ptr[0]->prev); - assert(!ptr[1]->next); - if (ptr[0]==ptr[1]) - { - assert(!ptr[0]->next); - assert(!ptr[1]->prev); - } - } + assert((ptr[0] && ptr[1]) || (!ptr[0] && !ptr[1])); + if(ptr[0] && ptr[1]) + { + assert(nedblksize(ptr[0])>=sizeof(threadcacheblk)); + assert(nedblksize(ptr[1])>=sizeof(threadcacheblk)); + assert(*(unsigned int *) "NEDN"==ptr[0]->magic); + assert(*(unsigned int *) "NEDN"==ptr[1]->magic); + assert(!ptr[0]->prev); + assert(!ptr[1]->next); + if(ptr[0]==ptr[1]) + { + assert(!ptr[0]->next); + assert(!ptr[1]->prev); + } + } } static void tcfullsanitycheck(threadcache *tc) THROWSPEC { - threadcacheblk **tcbptr=tc->bins; - int n; - for (n=0; n<=THREADCACHEMAXBINS; n++, tcbptr+=2) - { - threadcacheblk *b, *ob=0; - tcsanitycheck(tcbptr); - for (b=tcbptr[0]; b; ob=b, b=b->next) - { - assert(*(unsigned int *) "NEDN"==b->magic); - assert(!ob || ob->next==b); - assert(!ob || b->prev==ob); - } - } + threadcacheblk **tcbptr=tc->bins; + int n; + for(n=0; n<=THREADCACHEMAXBINS; n++, tcbptr+=2) + { + threadcacheblk *b, *ob=0; + tcsanitycheck(tcbptr); + for(b=tcbptr[0]; b; ob=b, b=b->next) + { + assert(*(unsigned int *) "NEDN"==b->magic); + assert(!ob || ob->next==b); + assert(!ob || b->prev==ob); + } + } } #endif static NOINLINE void RemoveCacheEntries(nedpool *p, threadcache *tc, unsigned int age) THROWSPEC { - UNREFERENCED_PARAMETER(p); #ifdef FULLSANITYCHECKS - tcfullsanitycheck(tc); + tcfullsanitycheck(tc); #endif - if (tc->freeInCache) - { - threadcacheblk **tcbptr=tc->bins; - int n; - for (n=0; n<=THREADCACHEMAXBINS; n++, tcbptr+=2) - { - threadcacheblk **tcb=tcbptr+1; /* come from oldest end of list */ - /*tcsanitycheck(tcbptr);*/ - for (; *tcb && tc->frees-(*tcb)->lastUsed>=age;) - { - threadcacheblk *f=*tcb; - size_t blksize=f->size; /*nedblksize(f);*/ - assert(blksize<=nedblksize(f)); - assert(blksize); + if(tc->freeInCache) + { + threadcacheblk **tcbptr=tc->bins; + int n; + for(n=0; n<=THREADCACHEMAXBINS; n++, tcbptr+=2) + { + threadcacheblk **tcb=tcbptr+1; /* come from oldest end of list */ + /*tcsanitycheck(tcbptr);*/ + for(; *tcb && tc->frees-(*tcb)->lastUsed>=age; ) + { + threadcacheblk *f=*tcb; + size_t blksize=f->size; /*nedblksize(f);*/ + assert(blksize<=nedblksize(f)); + assert(blksize); #ifdef FULLSANITYCHECKS - assert(*(unsigned int *) "NEDN"==(*tcb)->magic); + assert(*(unsigned int *) "NEDN"==(*tcb)->magic); #endif - *tcb=(*tcb)->prev; - if (*tcb) - (*tcb)->next=0; - else - *tcbptr=0; - tc->freeInCache-=blksize; - assert((long) tc->freeInCache>=0); - mspace_free(0, f); - /*tcsanitycheck(tcbptr);*/ - } - } - } + *tcb=(*tcb)->prev; + if(*tcb) + (*tcb)->next=0; + else + *tcbptr=0; + tc->freeInCache-=blksize; + assert((long) tc->freeInCache>=0); + CallFree(0, f); + /*tcsanitycheck(tcbptr);*/ + } + } + } #ifdef FULLSANITYCHECKS - tcfullsanitycheck(tc); + tcfullsanitycheck(tc); #endif } static void DestroyCaches(nedpool *p) THROWSPEC { - if (p->caches) - { - threadcache *tc; - int n; - for (n=0; ncaches[n])) - { - tc->frees++; - RemoveCacheEntries(p, tc, 0); - assert(!tc->freeInCache); - tc->mymspace=-1; - tc->threadid=0; - mspace_free(0, tc); - p->caches[n]=0; - } - } - } + if(p->caches) + { + threadcache *tc; + int n; + for(n=0; ncaches[n])) + { + tc->frees++; + RemoveCacheEntries(p, tc, 0); + assert(!tc->freeInCache); + tc->mymspace=-1; + tc->threadid=0; + CallFree(0, tc); + p->caches[n]=0; + } + } + } } static NOINLINE threadcache *AllocCache(nedpool *p) THROWSPEC { - threadcache *tc=0; - int n, end; - ACQUIRE_LOCK(&p->mutex); - for (n=0; ncaches[n]; n++); - if (THREADCACHEMAXCACHES==n) - { /* List exhausted, so disable for this thread */ - RELEASE_LOCK(&p->mutex); - return 0; - } - tc=p->caches[n]=(threadcache *) mspace_calloc(p->m[0], 1, sizeof(threadcache)); - if (!tc) - { - RELEASE_LOCK(&p->mutex); - return 0; - } + threadcache *tc=0; + int n, end; + ACQUIRE_LOCK(&p->mutex); + for(n=0; ncaches[n]; n++); + if(THREADCACHEMAXCACHES==n) + { /* List exhausted, so disable for this thread */ + RELEASE_LOCK(&p->mutex); + return 0; + } + tc=p->caches[n]=(threadcache *) CallCalloc(p->m[0], 1, sizeof(threadcache), 0); + if(!tc) + { + RELEASE_LOCK(&p->mutex); + return 0; + } #ifdef FULLSANITYCHECKS - tc->magic1=*(unsigned int *)"NEDMALC1"; - tc->magic2=*(unsigned int *)"NEDMALC2"; + tc->magic1=*(unsigned int *)"NEDMALC1"; + tc->magic2=*(unsigned int *)"NEDMALC2"; #endif - tc->threadid=(long)(size_t)CURRENT_THREAD; - for (end=0; p->m[end]; end++); - tc->mymspace=abs(tc->threadid) % end; - RELEASE_LOCK(&p->mutex); - if (TLSSET(p->mycache, (void *)(size_t)(n+1))) abort(); - return tc; + tc->threadid=(long)(size_t)CURRENT_THREAD; + for(end=0; p->m[end]; end++); + tc->mymspace=abs(tc->threadid) % end; + RELEASE_LOCK(&p->mutex); + if(TLSSET(p->mycache, (void *)(size_t)(n+1))) abort(); + return tc; } static void *threadcache_malloc(nedpool *p, threadcache *tc, size_t *size) THROWSPEC { - void *ret=0; - unsigned int bestsize; - unsigned int idx=size2binidx(*size); - size_t blksize=0; - threadcacheblk *blk, **binsptr; - UNREFERENCED_PARAMETER(p); + void *ret=0; + unsigned int bestsize; + unsigned int idx=size2binidx(*size); + size_t blksize=0; + threadcacheblk *blk, **binsptr; #ifdef FULLSANITYCHECKS - tcfullsanitycheck(tc); + tcfullsanitycheck(tc); #endif - /* Calculate best fit bin size */ - bestsize=1<<(idx+4); + /* Calculate best fit bin size */ + bestsize=1<<(idx+4); #if 0 - /* Finer grained bin fit */ - idx<<=1; - if (*size>bestsize) - { - idx++; - bestsize+=bestsize>>1; - } - if (*size>bestsize) - { - idx++; - bestsize=1<<(4+(idx>>1)); - } + /* Finer grained bin fit */ + idx<<=1; + if(*size>bestsize) + { + idx++; + bestsize+=bestsize>>1; + } + if(*size>bestsize) + { + idx++; + bestsize=1<<(4+(idx>>1)); + } #else - if (*size>bestsize) - { - idx++; - bestsize<<=1; - } + if(*size>bestsize) + { + idx++; + bestsize<<=1; + } #endif - assert(bestsize>=*size); - if (*sizebins[idx*2]; - /* Try to match close, but move up a bin if necessary */ - blk=*binsptr; - if (!blk || blk->size<*size) - { /* Bump it up a bin */ - if (idxsize; /*nedblksize(blk);*/ - assert(nedblksize(blk)>=blksize); - assert(blksize>=*size); - if (blk->next) - blk->next->prev=0; - *binsptr=blk->next; - if (!*binsptr) - binsptr[1]=0; + assert(bestsize>=*size); + if(*sizebins[idx*2]; + /* Try to match close, but move up a bin if necessary */ + blk=*binsptr; + if(!blk || blk->size<*size) + { /* Bump it up a bin */ + if(idxsize; /*nedblksize(blk);*/ + assert(nedblksize(blk)>=blksize); + assert(blksize>=*size); + if(blk->next) + blk->next->prev=0; + *binsptr=blk->next; + if(!*binsptr) + binsptr[1]=0; #ifdef FULLSANITYCHECKS - blk->magic=0; + blk->magic=0; #endif - assert(binsptr[0]!=blk && binsptr[1]!=blk); - assert(nedblksize(blk)>=sizeof(threadcacheblk) && nedblksize(blk)<=THREADCACHEMAX+CHUNK_OVERHEAD); - /*printf("malloc: %p, %p, %p, %lu\n", p, tc, blk, (long) size);*/ - ret=(void *) blk; - } - ++tc->mallocs; - if (ret) - { - assert(blksize>=*size); - ++tc->successes; - tc->freeInCache-=blksize; - assert((long) tc->freeInCache>=0); - } + assert(binsptr[0]!=blk && binsptr[1]!=blk); + assert(nedblksize(blk)>=sizeof(threadcacheblk) && nedblksize(blk)<=THREADCACHEMAX+CHUNK_OVERHEAD); + /*printf("malloc: %p, %p, %p, %lu\n", p, tc, blk, (long) size);*/ + ret=(void *) blk; + } + ++tc->mallocs; + if(ret) + { + assert(blksize>=*size); + ++tc->successes; + tc->freeInCache-=blksize; + assert((long) tc->freeInCache>=0); + } #if defined(DEBUG) && 0 - if (!(tc->mallocs & 0xfff)) - { - printf("*** threadcache=%u, mallocs=%u (%f), free=%u (%f), freeInCache=%u\n", (unsigned int) tc->threadid, tc->mallocs, - (float) tc->successes/tc->mallocs, tc->frees, (float) tc->successes/tc->frees, (unsigned int) tc->freeInCache); - } + if(!(tc->mallocs & 0xfff)) + { + printf("*** threadcache=%u, mallocs=%u (%f), free=%u (%f), freeInCache=%u\n", (unsigned int) tc->threadid, tc->mallocs, + (float) tc->successes/tc->mallocs, tc->frees, (float) tc->successes/tc->frees, (unsigned int) tc->freeInCache); + } #endif #ifdef FULLSANITYCHECKS - tcfullsanitycheck(tc); + tcfullsanitycheck(tc); #endif - return ret; + return ret; } static NOINLINE void ReleaseFreeInCache(nedpool *p, threadcache *tc, int mymspace) THROWSPEC { - unsigned int age=THREADCACHEMAXFREESPACE/8192; - /*ACQUIRE_LOCK(&p->m[mymspace]->mutex);*/ - UNREFERENCED_PARAMETER(mymspace); - while (age && tc->freeInCache>=THREADCACHEMAXFREESPACE) - { - RemoveCacheEntries(p, tc, age); - /*printf("*** Removing cache entries older than %u (%u)\n", age, (unsigned int) tc->freeInCache);*/ - age>>=1; - } - /*RELEASE_LOCK(&p->m[mymspace]->mutex);*/ + unsigned int age=THREADCACHEMAXFREESPACE/8192; + /*ACQUIRE_LOCK(&p->m[mymspace]->mutex);*/ + while(age && tc->freeInCache>=THREADCACHEMAXFREESPACE) + { + RemoveCacheEntries(p, tc, age); + /*printf("*** Removing cache entries older than %u (%u)\n", age, (unsigned int) tc->freeInCache);*/ + age>>=1; + } + /*RELEASE_LOCK(&p->m[mymspace]->mutex);*/ } static void threadcache_free(nedpool *p, threadcache *tc, int mymspace, void *mem, size_t size) THROWSPEC { - unsigned int bestsize; - unsigned int idx=size2binidx(size); - threadcacheblk **binsptr, *tck=(threadcacheblk *) mem; - assert(size>=sizeof(threadcacheblk) && size<=THREADCACHEMAX+CHUNK_OVERHEAD); + unsigned int bestsize; + unsigned int idx=size2binidx(size); + threadcacheblk **binsptr, *tck=(threadcacheblk *) mem; + assert(size>=sizeof(threadcacheblk) && size<=THREADCACHEMAX+CHUNK_OVERHEAD); #ifdef DEBUG - { /* Make sure this is a valid memory block */ - mchunkptr p = mem2chunk(mem); - mstate fm = get_mstate_for(p); - if (!ok_magic(fm)) - { - USAGE_ERROR_ACTION(fm, p); - return; - } - } + /* Make sure this is a valid memory block */ + assert(nedblksize(mem)); #endif #ifdef FULLSANITYCHECKS - tcfullsanitycheck(tc); + tcfullsanitycheck(tc); #endif - /* Calculate best fit bin size */ - bestsize=1<<(idx+4); + /* Calculate best fit bin size */ + bestsize=1<<(idx+4); #if 0 - /* Finer grained bin fit */ - idx<<=1; - if (size>bestsize) - { - unsigned int biggerbestsize=bestsize+bestsize<<1; - if (size>=biggerbestsize) - { - idx++; - bestsize=biggerbestsize; - } - } + /* Finer grained bin fit */ + idx<<=1; + if(size>bestsize) + { + unsigned int biggerbestsize=bestsize+bestsize<<1; + if(size>=biggerbestsize) + { + idx++; + bestsize=biggerbestsize; + } + } #endif - if (bestsize!=size) /* dlmalloc can round up, so we round down to preserve indexing */ - size=bestsize; - binsptr=&tc->bins[idx*2]; - assert(idx<=THREADCACHEMAXBINS); - if (tck==*binsptr) - { - fprintf(stderr, "Attempt to free already freed memory block %p - aborting!\n", tck); - abort(); - } + if(bestsize!=size) /* dlmalloc can round up, so we round down to preserve indexing */ + size=bestsize; + binsptr=&tc->bins[idx*2]; + assert(idx<=THREADCACHEMAXBINS); + if(tck==*binsptr) + { + fprintf(stderr, "nedmalloc: Attempt to free already freed memory block %p - aborting!\n", tck); + abort(); + } #ifdef FULLSANITYCHECKS - tck->magic=*(unsigned int *) "NEDN"; + tck->magic=*(unsigned int *) "NEDN"; #endif - tck->lastUsed=++tc->frees; - tck->size=(unsigned int) size; - tck->next=*binsptr; - tck->prev=0; - if (tck->next) - tck->next->prev=tck; - else - binsptr[1]=tck; - assert(!*binsptr || (*binsptr)->size==tck->size); - *binsptr=tck; - assert(tck==tc->bins[idx*2]); - assert(tc->bins[idx*2+1]==tck || binsptr[0]->next->prev==tck); - /*printf("free: %p, %p, %p, %lu\n", p, tc, mem, (long) size);*/ - tc->freeInCache+=size; + tck->lastUsed=++tc->frees; + tck->size=(unsigned int) size; + tck->next=*binsptr; + tck->prev=0; + if(tck->next) + tck->next->prev=tck; + else + binsptr[1]=tck; + assert(!*binsptr || (*binsptr)->size==tck->size); + *binsptr=tck; + assert(tck==tc->bins[idx*2]); + assert(tc->bins[idx*2+1]==tck || binsptr[0]->next->prev==tck); + /*printf("free: %p, %p, %p, %lu\n", p, tc, mem, (long) size);*/ + tc->freeInCache+=size; #ifdef FULLSANITYCHECKS - tcfullsanitycheck(tc); + tcfullsanitycheck(tc); #endif #if 1 - if (tc->freeInCache>=THREADCACHEMAXFREESPACE) - ReleaseFreeInCache(p, tc, mymspace); + if(tc->freeInCache>=THREADCACHEMAXFREESPACE) + ReleaseFreeInCache(p, tc, mymspace); #endif } @@ -577,440 +712,502 @@ static void threadcache_free(nedpool *p, threadcache *tc, int mymspace, void *me static NOINLINE int InitPool(nedpool *p, size_t capacity, int threads) THROWSPEC { /* threads is -1 for system pool */ - ensure_initialization(); - ACQUIRE_MALLOC_GLOBAL_LOCK(); - if (p->threads) goto done; - if (INITIAL_LOCK(&p->mutex)) goto err; - if (TLSALLOC(&p->mycache)) goto err; - if (!(p->m[0]=(mstate) create_mspace(capacity, 1))) goto err; - p->m[0]->extp=p; - p->threads=(threads<1 || threads>MAXTHREADSINPOOL) ? MAXTHREADSINPOOL : threads; + ensure_initialization(); + ACQUIRE_MALLOC_GLOBAL_LOCK(); + if(p->threads) goto done; + if(INITIAL_LOCK(&p->mutex)) goto err; + if(TLSALLOC(&p->mycache)) goto err; +#if USE_ALLOCATOR==0 + p->m[0]=(mstate) mspacecounter++; +#elif USE_ALLOCATOR==1 + if(!(p->m[0]=(mstate) create_mspace(capacity, 1))) goto err; + p->m[0]->extp=p; +#endif + p->threads=(threads<1 || threads>MAXTHREADSINPOOL) ? MAXTHREADSINPOOL : threads; done: - RELEASE_MALLOC_GLOBAL_LOCK(); - return 1; + RELEASE_MALLOC_GLOBAL_LOCK(); + return 1; err: - if (threads<0) - abort(); /* If you can't allocate for system pool, we're screwed */ - DestroyCaches(p); - if (p->m[0]) - { - destroy_mspace(p->m[0]); - p->m[0]=0; - } - if (p->mycache) - { - if (TLSFREE(p->mycache)) abort(); - p->mycache=0; - } - RELEASE_MALLOC_GLOBAL_LOCK(); - return 0; + if(threads<0) + abort(); /* If you can't allocate for system pool, we're screwed */ + DestroyCaches(p); + if(p->m[0]) + { +#if USE_ALLOCATOR==1 + destroy_mspace(p->m[0]); +#endif + p->m[0]=0; + } + if(p->mycache) + { + if(TLSFREE(p->mycache)) abort(); + p->mycache=0; + } + RELEASE_MALLOC_GLOBAL_LOCK(); + return 0; } static NOINLINE mstate FindMSpace(nedpool *p, threadcache *tc, int *lastUsed, size_t size) THROWSPEC { /* Gets called when thread's last used mspace is in use. The strategy is to run through the list of all available mspaces looking for an unlocked one and if we fail, we create a new one so long as we don't exceed p->threads */ - int n, end; - for (n=end=*lastUsed+1; p->m[n]; end=++n) -{ -if (TRY_LOCK(&p->m[n]->mutex)) goto found; -} -for (n=0; n<*lastUsed && p->m[n]; n++) -{ -if (TRY_LOCK(&p->m[n]->mutex)) goto found; -} -if (endthreads) -{ -mstate temp; -if (!(temp=(mstate) create_mspace(size, 1))) - goto badexit; - /* Now we're ready to modify the lists, we lock */ - ACQUIRE_LOCK(&p->mutex); - while (p->m[end] && endthreads) - end++; - if (end>=p->threads) - { /* Drat, must destroy it now */ - RELEASE_LOCK(&p->mutex); - destroy_mspace((mspace) temp); - goto badexit; - } - /* We really want to make sure this goes into memory now but we - have to be careful of breaking aliasing rules, so write it twice */ - *((volatile struct malloc_state **) &p->m[end])=p->m[end]=temp; - ACQUIRE_LOCK(&p->m[end]->mutex); - /*printf("Created mspace idx %d\n", end);*/ - RELEASE_LOCK(&p->mutex); - n=end; - goto found; -} -/* Let it lock on the last one it used */ + int n, end; + for(n=end=*lastUsed+1; p->m[n]; end=++n) + { + if(TRY_LOCK(&p->m[n]->mutex)) goto found; + } + for(n=0; n<*lastUsed && p->m[n]; n++) + { + if(TRY_LOCK(&p->m[n]->mutex)) goto found; + } + if(endthreads) + { + mstate temp; +#if USE_ALLOCATOR==0 + temp=(mstate) mspacecounter++; +#elif USE_ALLOCATOR==1 + if(!(temp=(mstate) create_mspace(size, 1))) + goto badexit; +#endif + /* Now we're ready to modify the lists, we lock */ + ACQUIRE_LOCK(&p->mutex); + while(p->m[end] && endthreads) + end++; + if(end>=p->threads) + { /* Drat, must destroy it now */ + RELEASE_LOCK(&p->mutex); +#if USE_ALLOCATOR==1 + destroy_mspace((mstate) temp); +#endif + goto badexit; + } + /* We really want to make sure this goes into memory now but we + have to be careful of breaking aliasing rules, so write it twice */ + *((volatile struct malloc_state **) &p->m[end])=p->m[end]=temp; + ACQUIRE_LOCK(&p->m[end]->mutex); + /*printf("Created mspace idx %d\n", end);*/ + RELEASE_LOCK(&p->mutex); + n=end; + goto found; + } + /* Let it lock on the last one it used */ badexit: -ACQUIRE_LOCK(&p->m[*lastUsed]->mutex); -return p->m[*lastUsed]; + ACQUIRE_LOCK(&p->m[*lastUsed]->mutex); + return p->m[*lastUsed]; found: -*lastUsed=n; -if (tc) -tc->mymspace=n; -else -{ - if (TLSSET(p->mycache, (void *)(size_t)(-(n+1)))) abort(); - } -return p->m[n]; + *lastUsed=n; + if(tc) + tc->mymspace=n; + else + { + if(TLSSET(p->mycache, (void *)(size_t)(-(n+1)))) abort(); + } + return p->m[n]; } NEDMALLOCPTRATTR nedpool *nedcreatepool(size_t capacity, int threads) THROWSPEC { - nedpool *ret; - if (!(ret=(nedpool *) nedpcalloc(0, 1, sizeof(nedpool)))) return 0; - if (!InitPool(ret, capacity, threads)) - { - nedpfree(0, ret); - return 0; - } - return ret; + nedpool *ret; + if(!(ret=(nedpool *) nedpcalloc(0, 1, sizeof(nedpool)))) return 0; + if(!InitPool(ret, capacity, threads)) + { + nedpfree(0, ret); + return 0; + } + return ret; } void neddestroypool(nedpool *p) THROWSPEC { - int n; - ACQUIRE_LOCK(&p->mutex); - DestroyCaches(p); - for (n=0; p->m[n]; n++) - { - destroy_mspace(p->m[n]); - p->m[n]=0; - } - RELEASE_LOCK(&p->mutex); - if (TLSFREE(p->mycache)) abort(); - nedpfree(0, p); + int n; + ACQUIRE_LOCK(&p->mutex); + DestroyCaches(p); + for(n=0; p->m[n]; n++) + { +#if USE_ALLOCATOR==1 + destroy_mspace(p->m[n]); +#endif + p->m[n]=0; + } + RELEASE_LOCK(&p->mutex); + if(TLSFREE(p->mycache)) abort(); + nedpfree(0, p); } -void neddestroysyspool(void) THROWSPEC +void neddestroysyspool() THROWSPEC { - nedpool *p=&syspool; - int n; - ACQUIRE_LOCK(&p->mutex); - DestroyCaches(p); - for (n=0; p->m[n]; n++) - { - destroy_mspace(p->m[n]); - p->m[n]=0; - } - /* Render syspool unusable */ - for (n=0; ncaches[n]=(threadcache *) 0xdeadbeef; - for (n=0; nm[n]=(mstate) 0xdeadbeef; - if (TLSFREE(p->mycache)) abort(); - RELEASE_LOCK(&p->mutex); + nedpool *p=&syspool; + int n; + ACQUIRE_LOCK(&p->mutex); + DestroyCaches(p); + for(n=0; p->m[n]; n++) + { +#if USE_ALLOCATOR==1 + destroy_mspace(p->m[n]); +#endif + p->m[n]=0; + } + /* Render syspool unusable */ + for(n=0; ncaches[n]=(threadcache *)(size_t)(sizeof(size_t)>4 ? 0xdeadbeefdeadbeef : 0xdeadbeef); + for(n=0; nm[n]=(mstate)(size_t)(sizeof(size_t)>4 ? 0xdeadbeefdeadbeef : 0xdeadbeef); + if(TLSFREE(p->mycache)) abort(); + RELEASE_LOCK(&p->mutex); } void nedpsetvalue(nedpool *p, void *v) THROWSPEC { - if (!p) { p=&syspool; if (!syspool.threads) InitPool(&syspool, 0, -1); } - p->uservalue=v; + if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); } + p->uservalue=v; } void *nedgetvalue(nedpool **p, void *mem) THROWSPEC { - nedpool *np=0; - mchunkptr mcp=mem2chunk(mem); - mstate fm; - if (!(is_aligned(chunk2mem(mcp))) && mcp->head != FENCEPOST_HEAD) return 0; - if (!cinuse(mcp)) return 0; - if (!next_pinuse(mcp)) return 0; - if (!is_mmapped(mcp) && !pinuse(mcp)) - { - if (next_chunk(prev_chunk(mcp))!=mcp) return 0; - } - fm=get_mstate_for(mcp); - if (!ok_magic(fm)) return 0; - if (!ok_address(fm, mcp)) return 0; - if (!fm->extp) return 0; - np=(nedpool *) fm->extp; - if (p) *p=np; - return np->uservalue; + nedpool *np=0; + mchunkptr mcp=mem2chunk(mem); + mstate fm; + if(!(is_aligned(chunk2mem(mcp))) && mcp->head != FENCEPOST_HEAD) return 0; + if(!cinuse(mcp)) return 0; + if(!next_pinuse(mcp)) return 0; + if(!is_mmapped(mcp) && !pinuse(mcp)) + { + if(next_chunk(prev_chunk(mcp))!=mcp) return 0; + } + fm=get_mstate_for(mcp); + if(!ok_magic(fm)) return 0; + if(!ok_address(fm, mcp)) return 0; + if(!fm->extp) return 0; + np=(nedpool *) fm->extp; + if(p) *p=np; + return np->uservalue; } void nedtrimthreadcache(nedpool *p, int disable) THROWSPEC { - int mycache; - if (!p) - { - p=&syspool; - if (!syspool.threads) InitPool(&syspool, 0, -1); - } - mycache=(int)(size_t) TLSGET(p->mycache); - if (!mycache) - { /* Set to mspace 0 */ - if (disable && TLSSET(p->mycache, (void *)-1)) abort(); - } - else if (mycache>0) - { /* Set to last used mspace */ - threadcache *tc=p->caches[mycache-1]; + int mycache; + if(!p) + { + p=&syspool; + if(!syspool.threads) InitPool(&syspool, 0, -1); + } + mycache=(int)(size_t) TLSGET(p->mycache); + if(!mycache) + { /* Set to mspace 0 */ + if(disable && TLSSET(p->mycache, (void *)(size_t)-1)) abort(); + } + else if(mycache>0) + { /* Set to last used mspace */ + threadcache *tc=p->caches[mycache-1]; #if defined(DEBUG) - printf("Threadcache utilisation: %lf%% in cache with %lf%% lost to other threads\n", - 100.0*tc->successes/tc->mallocs, 100.0*((double) tc->mallocs-tc->frees)/tc->mallocs); + printf("Threadcache utilisation: %lf%% in cache with %lf%% lost to other threads\n", + 100.0*tc->successes/tc->mallocs, 100.0*((double) tc->mallocs-tc->frees)/tc->mallocs); #endif - if (disable && TLSSET(p->mycache, (void *)(size_t)(-tc->mymspace))) abort(); - tc->frees++; - RemoveCacheEntries(p, tc, 0); - assert(!tc->freeInCache); - if (disable) - { - tc->mymspace=-1; - tc->threadid=0; - mspace_free(0, p->caches[mycache-1]); - p->caches[mycache-1]=0; - } - } + if(disable && TLSSET(p->mycache, (void *)(size_t)(-tc->mymspace))) abort(); + tc->frees++; + RemoveCacheEntries(p, tc, 0); + assert(!tc->freeInCache); + if(disable) + { + tc->mymspace=-1; + tc->threadid=0; + CallFree(0, p->caches[mycache-1]); + p->caches[mycache-1]=0; + } + } } void neddisablethreadcache(nedpool *p) THROWSPEC { - nedtrimthreadcache(p, 1); + nedtrimthreadcache(p, 1); } -#define GETMSPACE(m,p,tc,ms,s,action) \ - do \ - { \ - mstate m = GetMSpace((p),(tc),(ms),(s)); \ - action; \ - RELEASE_LOCK(&m->mutex); \ +#define GETMSPACE(m,p,tc,ms,s,action) \ + do \ + { \ + mstate m = GetMSpace((p),(tc),(ms),(s)); \ + action; \ + if(USE_ALLOCATOR==1) { RELEASE_LOCK(&m->mutex); } \ } while (0) static FORCEINLINE mstate GetMSpace(nedpool *p, threadcache *tc, int mymspace, size_t size) THROWSPEC { /* Returns a locked and ready for use mspace */ - mstate m=p->m[mymspace]; - assert(m); - if (!TRY_LOCK(&p->m[mymspace]->mutex)) m=FindMSpace(p, tc, &mymspace, size); \ - /*assert(IS_LOCKED(&p->m[mymspace]->mutex));*/ - return m; + mstate m=p->m[mymspace]; + assert(m); +#if USE_ALLOCATOR==1 + if(!TRY_LOCK(&p->m[mymspace]->mutex)) m=FindMSpace(p, tc, &mymspace, size); + /*assert(IS_LOCKED(&p->m[mymspace]->mutex));*/ +#endif + return m; } static FORCEINLINE void GetThreadCache(nedpool **p, threadcache **tc, int *mymspace, size_t *size) THROWSPEC { - int mycache; - if (size && *sizemycache); - if (mycache>0) - { /* Already have a cache */ - *tc=(*p)->caches[mycache-1]; - *mymspace=(*tc)->mymspace; - } - else if (!mycache) - { /* Need to allocate a new cache */ - *tc=AllocCache(*p); - if (!*tc) - { /* Disable */ - if (TLSSET((*p)->mycache, (void *)-1)) abort(); - *mymspace=0; - } - else - *mymspace=(*tc)->mymspace; - } - else - { /* Cache disabled, but we do have an assigned thread pool */ - *tc=0; - *mymspace=-mycache-1; - } - assert(*mymspace>=0); - assert(!(*tc) || (long)(size_t)CURRENT_THREAD==(*tc)->threadid); + int mycache; + if(size && *sizemycache); + if(mycache>0) + { /* Already have a cache */ + *tc=(*p)->caches[mycache-1]; + *mymspace=(*tc)->mymspace; + } + else if(!mycache) + { /* Need to allocate a new cache */ + *tc=AllocCache(*p); + if(!*tc) + { /* Disable */ + if(TLSSET((*p)->mycache, (void *)(size_t)-1)) abort(); + *mymspace=0; + } + else + *mymspace=(*tc)->mymspace; + } + else + { /* Cache disabled, but we do have an assigned thread pool */ + *tc=0; + *mymspace=-mycache-1; + } + assert(*mymspace>=0); + assert(!(*tc) || (long)(size_t)CURRENT_THREAD==(*tc)->threadid); #ifdef FULLSANITYCHECKS - if (*tc) - { - if (*(unsigned int *)"NEDMALC1"!=(*tc)->magic1 || *(unsigned int *)"NEDMALC2"!=(*tc)->magic2) - { - abort(); - } - } + if(*tc) + { + if(*(unsigned int *)"NEDMALC1"!=(*tc)->magic1 || *(unsigned int *)"NEDMALC2"!=(*tc)->magic2) + { + abort(); + } + } #endif } NEDMALLOCPTRATTR void * nedpmalloc(nedpool *p, size_t size) THROWSPEC { - void *ret=0; - threadcache *tc; - int mymspace; - GetThreadCache(&p, &tc, &mymspace, &size); + void *ret=0; + threadcache *tc; + int mymspace; + GetThreadCache(&p, &tc, &mymspace, &size); #if THREADCACHEMAX - if (tc && size<=THREADCACHEMAX) - { /* Use the thread cache */ - ret=threadcache_malloc(p, tc, &size); - } + if(tc && size<=THREADCACHEMAX) + { /* Use the thread cache */ + ret=threadcache_malloc(p, tc, &size); + } #endif - if (!ret) - { /* Use this thread's mspace */ + if(!ret) + { /* Use this thread's mspace */ GETMSPACE(m, p, tc, mymspace, size, - ret=mspace_malloc(m, size)); - } - return ret; + ret=CallMalloc(m, size, 0)); + } + return ret; } NEDMALLOCPTRATTR void * nedpcalloc(nedpool *p, size_t no, size_t size) THROWSPEC { - size_t rsize=size*no; - void *ret=0; - threadcache *tc; - int mymspace; - GetThreadCache(&p, &tc, &mymspace, &rsize); + size_t rsize=size*no; + void *ret=0; + threadcache *tc; + int mymspace; + GetThreadCache(&p, &tc, &mymspace, &rsize); #if THREADCACHEMAX - if (tc && rsize<=THREADCACHEMAX) - { /* Use the thread cache */ - if ((ret=threadcache_malloc(p, tc, &rsize))) - memset(ret, 0, rsize); - } + if(tc && rsize<=THREADCACHEMAX) + { /* Use the thread cache */ + if((ret=threadcache_malloc(p, tc, &rsize))) + memset(ret, 0, rsize); + } #endif - if (!ret) - { /* Use this thread's mspace */ + if(!ret) + { /* Use this thread's mspace */ GETMSPACE(m, p, tc, mymspace, rsize, - ret=mspace_calloc(m, 1, rsize)); - } - return ret; + ret=CallCalloc(m, 1, rsize, 0)); + } + return ret; } NEDMALLOCPTRATTR void * nedprealloc(nedpool *p, void *mem, size_t size) THROWSPEC { - void *ret=0; - threadcache *tc; - int mymspace; - if (!mem) return nedpmalloc(p, size); - GetThreadCache(&p, &tc, &mymspace, &size); + void *ret=0; + threadcache *tc; + int mymspace; + if(!mem) return nedpmalloc(p, size); + GetThreadCache(&p, &tc, &mymspace, &size); #if THREADCACHEMAX - if (tc && size && size<=THREADCACHEMAX) - { /* Use the thread cache */ - size_t memsize=nedblksize(mem); - assert(memsize); - if ((ret=threadcache_malloc(p, tc, &size))) - { - memcpy(ret, mem, memsizem[n]; n++) -{ -struct mallinfo t=mspace_mallinfo(p->m[n]); - ret.arena+=t.arena; - ret.ordblks+=t.ordblks; - ret.hblkhd+=t.hblkhd; - ret.usmblks+=t.usmblks; - ret.uordblks+=t.uordblks; - ret.fordblks+=t.fordblks; - ret.keepcost+=t.keepcost; -} -return ret; + int n; + struct mallinfo ret={0}; + if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); } + for(n=0; p->m[n]; n++) + { +#if USE_ALLOCATOR==1 + struct mallinfo t=mspace_mallinfo(p->m[n]); + ret.arena+=t.arena; + ret.ordblks+=t.ordblks; + ret.hblkhd+=t.hblkhd; + ret.usmblks+=t.usmblks; + ret.uordblks+=t.uordblks; + ret.fordblks+=t.fordblks; + ret.keepcost+=t.keepcost; +#endif + } + return ret; } #endif int nedpmallopt(nedpool *p, int parno, int value) THROWSPEC { - UNREFERENCED_PARAMETER(p); - return mspace_mallopt(parno, value); +#if USE_ALLOCATOR==1 + return mspace_mallopt(parno, value); +#else + return 0; +#endif +} +void* nedmalloc_internals(size_t *granularity, size_t *magic) THROWSPEC +{ +#if USE_ALLOCATOR==1 + if(granularity) *granularity=mparams.granularity; + if(magic) *magic=mparams.magic; + return (void *) &syspool; +#else + if(granularity) *granularity=0; + if(magic) *magic=0; + return 0; +#endif } int nedpmalloc_trim(nedpool *p, size_t pad) THROWSPEC { - int n, ret=0; - if (!p) { p=&syspool; if (!syspool.threads) InitPool(&syspool, 0, -1); } - for (n=0; p->m[n]; n++) - { - ret+=mspace_trim(p->m[n], pad); - } - return ret; + int n, ret=0; + if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); } + for(n=0; p->m[n]; n++) + { +#if USE_ALLOCATOR==1 + ret+=mspace_trim(p->m[n], pad); +#endif + } + return ret; } void nedpmalloc_stats(nedpool *p) THROWSPEC { - int n; - if (!p) { p=&syspool; if (!syspool.threads) InitPool(&syspool, 0, -1); } - for (n=0; p->m[n]; n++) - { - mspace_malloc_stats(p->m[n]); - } + int n; + if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); } + for(n=0; p->m[n]; n++) + { +#if USE_ALLOCATOR==1 + mspace_malloc_stats(p->m[n]); +#endif + } } size_t nedpmalloc_footprint(nedpool *p) THROWSPEC { - size_t ret=0; - int n; - if (!p) { p=&syspool; if (!syspool.threads) InitPool(&syspool, 0, -1); } - for (n=0; p->m[n]; n++) - { - ret+=mspace_footprint(p->m[n]); - } - return ret; + size_t ret=0; + int n; + if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); } + for(n=0; p->m[n]; n++) + { +#if USE_ALLOCATOR==1 + ret+=mspace_footprint(p->m[n]); +#endif + } + return ret; } NEDMALLOCPTRATTR void **nedpindependent_calloc(nedpool *p, size_t elemsno, size_t elemsize, void **chunks) THROWSPEC { - void **ret; - threadcache *tc; - int mymspace; - GetThreadCache(&p, &tc, &mymspace, &elemsize); + void **ret; + threadcache *tc; + int mymspace; + GetThreadCache(&p, &tc, &mymspace, &elemsize); +#if USE_ALLOCATOR==0 + GETMSPACE(m, p, tc, mymspace, elemsno*elemsize, + ret=unsupported_operation("independent_calloc")); +#elif USE_ALLOCATOR==1 GETMSPACE(m, p, tc, mymspace, elemsno*elemsize, ret=mspace_independent_calloc(m, elemsno, elemsize, chunks)); - return ret; +#endif + return ret; } NEDMALLOCPTRATTR void **nedpindependent_comalloc(nedpool *p, size_t elems, size_t *sizes, void **chunks) THROWSPEC { - void **ret; - threadcache *tc; - int mymspace; + void **ret; + threadcache *tc; + int mymspace; size_t i, *adjustedsizes=(size_t *) alloca(elems*sizeof(size_t)); - if (!adjustedsizes) return 0; - for (i=0; i