Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. =================================================================== RCS file: /ftp/cvs/cvsroot/src/lib/libc/stdlib/jemalloc.c,v rcsdiff: /ftp/cvs/cvsroot/src/lib/libc/stdlib/jemalloc.c,v: warning: Unknown phrases like `commitid ...;' are present. retrieving revision 1.21.2.1 retrieving revision 1.44.4.1 diff -u -p -r1.21.2.1 -r1.44.4.1 --- src/lib/libc/stdlib/jemalloc.c 2011/03/05 15:09:20 1.21.2.1 +++ src/lib/libc/stdlib/jemalloc.c 2019/06/10 22:05:21 1.44.4.1 @@ -1,4 +1,4 @@ -/* $NetBSD: jemalloc.c,v 1.21.2.1 2011/03/05 15:09:20 bouyer Exp $ */ +/* $NetBSD: jemalloc.c,v 1.44.4.1 2019/06/10 22:05:21 christos Exp $ */ /*- * Copyright (C) 2006,2007 Jason Evans . @@ -118,7 +118,7 @@ #include /* __FBSDID("$FreeBSD: src/lib/libc/stdlib/malloc.c,v 1.147 2007/06/15 22:00:16 jasone Exp $"); */ -__RCSID("$NetBSD: jemalloc.c,v 1.21.2.1 2011/03/05 15:09:20 bouyer Exp $"); +__RCSID("$NetBSD: jemalloc.c,v 1.44.4.1 2019/06/10 22:05:21 christos Exp $"); #ifdef __FreeBSD__ #include "libc_private.h" @@ -143,8 +143,8 @@ __RCSID("$NetBSD: jemalloc.c,v 1.21.2.1 #ifdef __FreeBSD__ #include #include -#endif #include +#endif #include #include @@ -163,27 +163,7 @@ __RCSID("$NetBSD: jemalloc.c,v 1.21.2.1 # include # include "extern.h" -#define STRERROR_R(a, b, c) __strerror_r(a, b, c); -/* - * A non localized version of strerror, that avoids bringing in - * stdio and the locale code. All the malloc messages are in English - * so why bother? - */ -static int -__strerror_r(int e, char *s, size_t l) -{ - int rval; - size_t slen; - - if (e >= 0 && e < sys_nerr) { - slen = strlcpy(s, sys_errlist[e], l); - rval = 0; - } else { - slen = snprintf_ss(s, l, "Unknown error %u", e); - rval = EINVAL; - } - return slen >= l ? ERANGE : rval; -} +#define STRERROR_R(a, b, c) strerror_r_ss(a, b, c); #endif #ifdef __FreeBSD__ @@ -216,6 +196,14 @@ __strerror_r(int e, char *s, size_t l) #define STRERROR_BUF 64 /* Minimum alignment of allocations is 2^QUANTUM_2POW_MIN bytes. */ + +/* + * If you touch the TINY_MIN_2POW definition for any architecture, please + * make sure to adjust the corresponding definition for JEMALLOC_TINY_MIN_2POW + * in the gcc 4.8 tree in dist/gcc/tree-ssa-ccp.c and verify that a native + * gcc is still buildable! + */ + #ifdef __i386__ # define QUANTUM_2POW_MIN 4 # define SIZEOF_PTR_2POW 2 @@ -225,36 +213,53 @@ __strerror_r(int e, char *s, size_t l) # define QUANTUM_2POW_MIN 4 # define SIZEOF_PTR_2POW 3 #endif +#ifdef __aarch64__ +# define QUANTUM_2POW_MIN 4 +# define SIZEOF_PTR_2POW 3 +# define NO_TLS +#endif #ifdef __alpha__ # define QUANTUM_2POW_MIN 4 # define SIZEOF_PTR_2POW 3 +# define TINY_MIN_2POW 3 # define NO_TLS #endif #ifdef __sparc64__ # define QUANTUM_2POW_MIN 4 # define SIZEOF_PTR_2POW 3 +# define TINY_MIN_2POW 3 # define NO_TLS #endif #ifdef __amd64__ # define QUANTUM_2POW_MIN 4 # define SIZEOF_PTR_2POW 3 +# define TINY_MIN_2POW 3 #endif #ifdef __arm__ # define QUANTUM_2POW_MIN 3 # define SIZEOF_PTR_2POW 2 # define USE_BRK +# ifdef __ARM_EABI__ +# define TINY_MIN_2POW 3 +# endif # define NO_TLS #endif #ifdef __powerpc__ # define QUANTUM_2POW_MIN 4 # define SIZEOF_PTR_2POW 2 # define USE_BRK +# define TINY_MIN_2POW 3 #endif #if defined(__sparc__) && !defined(__sparc64__) # define QUANTUM_2POW_MIN 4 # define SIZEOF_PTR_2POW 2 # define USE_BRK #endif +#ifdef __or1k__ +# define QUANTUM_2POW_MIN 4 +# define SIZEOF_PTR_2POW 2 +# define USE_BRK +#endif #ifdef __vax__ # define QUANTUM_2POW_MIN 4 # define SIZEOF_PTR_2POW 2 @@ -270,14 +275,20 @@ __strerror_r(int e, char *s, size_t l) # define SIZEOF_PTR_2POW 2 # define USE_BRK #endif -#ifdef __mips__ +#if defined(__mips__) || defined(__riscv__) +# ifdef _LP64 +# define SIZEOF_PTR_2POW 3 +# define TINY_MIN_2POW 3 +# else +# define SIZEOF_PTR_2POW 2 +# endif # define QUANTUM_2POW_MIN 4 -# define SIZEOF_PTR_2POW 2 # define USE_BRK #endif #ifdef __hppa__ -# define QUANTUM_2POW_MIN 4 -# define SIZEOF_PTR_2POW 2 +# define QUANTUM_2POW_MIN 4 +# define TINY_MIN_2POW 4 +# define SIZEOF_PTR_2POW 2 # define USE_BRK #endif @@ -288,11 +299,6 @@ __strerror_r(int e, char *s, size_t l) # define SIZEOF_INT_2POW 2 #endif -/* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */ -#if (!defined(PIC) && !defined(NO_TLS)) -# define NO_TLS -#endif - /* * Size and alignment of memory chunks that are allocated by the OS's virtual * memory system. @@ -308,7 +314,9 @@ __strerror_r(int e, char *s, size_t l) #define CACHELINE ((size_t)(1 << CACHELINE_2POW)) /* Smallest size class to support. */ -#define TINY_MIN_2POW 1 +#ifndef TINY_MIN_2POW +#define TINY_MIN_2POW 2 +#endif /* * Maximum size class that is a multiple of the quantum, but not (necessarily) @@ -365,9 +373,11 @@ static malloc_mutex_t init_lock = {_SPIN /* Set to true once the allocator has been initialized. */ static bool malloc_initialized = false; +#ifdef _REENTRANT /* Used to avoid initialization races. */ static mutex_t init_lock = MUTEX_INITIALIZER; #endif +#endif /******************************************************************************/ /* @@ -677,8 +687,10 @@ static size_t arena_maxclass; /* Max si * Chunks. */ +#ifdef _REENTRANT /* Protects chunk-related data structures. */ static malloc_mutex_t chunks_mtx; +#endif /* Tree of chunks that are stand-alone huge allocations. */ static chunk_tree_t huge; @@ -692,7 +704,9 @@ static chunk_tree_t huge; * base_pages_alloc() also uses sbrk(), but cannot lock chunks_mtx (doing so * could cause recursive lock acquisition). */ +#ifdef _REENTRANT static malloc_mutex_t brk_mtx; +#endif /* Result of first sbrk(0) call. */ static void *brk_base; /* Current end of brk, or ((void *)-1) if brk is exhausted. */ @@ -729,7 +743,9 @@ static void *base_pages; static void *base_next_addr; static void *base_past_addr; /* Addr immediately past base_pages. */ static chunk_node_t *base_chunk_nodes; /* LIFO cache of chunk nodes. */ +#ifdef _REENTRANT static malloc_mutex_t base_mtx; +#endif #ifdef MALLOC_STATS static size_t base_mapped; #endif @@ -746,20 +762,62 @@ static size_t base_mapped; static arena_t **arenas; static unsigned narenas; static unsigned next_arena; +#ifdef _REENTRANT static malloc_mutex_t arenas_mtx; /* Protects arenas initialization. */ +#endif -#ifndef NO_TLS /* * Map of pthread_self() --> arenas[???], used for selecting an arena to use * for allocations. */ -static __thread arena_t *arenas_map; -#define get_arenas_map() (arenas_map) -#define set_arenas_map(x) (arenas_map = x) +#ifndef NO_TLS +static __thread arena_t **arenas_map; +#else +static arena_t **arenas_map; +#endif + +#if !defined(NO_TLS) || !defined(_REENTRANT) +# define get_arenas_map() (arenas_map) +# define set_arenas_map(x) (arenas_map = x) #else -static thread_key_t arenas_map_key; -#define get_arenas_map() thr_getspecific(arenas_map_key) -#define set_arenas_map(x) thr_setspecific(arenas_map_key, x) + +static thread_key_t arenas_map_key = -1; + +static inline arena_t ** +get_arenas_map(void) +{ + if (!__isthreaded) + return arenas_map; + + if (arenas_map_key == -1) { + (void)thr_keycreate(&arenas_map_key, NULL); + if (arenas_map != NULL) { + thr_setspecific(arenas_map_key, arenas_map); + arenas_map = NULL; + } + } + + return thr_getspecific(arenas_map_key); +} + +static __inline void +set_arenas_map(arena_t **a) +{ + if (!__isthreaded) { + arenas_map = a; + return; + } + + if (arenas_map_key == -1) { + (void)thr_keycreate(&arenas_map_key, NULL); + if (arenas_map != NULL) { + _DIAGASSERT(arenas_map == a); + arenas_map = NULL; + } + } + + thr_setspecific(arenas_map_key, a); +} #endif #ifdef MALLOC_STATS @@ -816,7 +874,7 @@ static void wrtmessage(const char *p1, c #ifdef MALLOC_STATS static void malloc_printf(const char *format, ...); #endif -static char *umax2s(uintmax_t x, char *s); +static char *size_t2s(size_t x, char *s); static bool base_pages_alloc(size_t minsize); static void *base_alloc(size_t size); static chunk_node_t *base_chunk_node_alloc(void); @@ -978,19 +1036,19 @@ malloc_printf(const char *format, ...) /* * We don't want to depend on vsnprintf() for production builds, since that can - * cause unnecessary bloat for static binaries. umax2s() provides minimal + * cause unnecessary bloat for static binaries. size_t2s() provides minimal * integer printing functionality, so that malloc_printf() use can be limited to * MALLOC_STATS code. */ #define UMAX2S_BUFSIZE 21 static char * -umax2s(uintmax_t x, char *s) +size_t2s(size_t x, char *s) { unsigned i; /* Make sure UMAX2S_BUFSIZE is large enough. */ /* LINTED */ - assert(sizeof(uintmax_t) <= 8); + assert(sizeof(size_t) <= 8); i = UMAX2S_BUFSIZE - 1; s[i] = '\0'; @@ -1586,6 +1644,11 @@ arena_chunk_comp(arena_chunk_t *a, arena assert(a != NULL); assert(b != NULL); + if (a->max_frun_npages < b->max_frun_npages) + return -1; + if (a->max_frun_npages > b->max_frun_npages) + return 1; + if ((uintptr_t)a < (uintptr_t)b) return (-1); else if (a == b) @@ -1643,7 +1706,7 @@ arena_run_reg_alloc(arena_run_t *run, ar + (bin->reg_size * regind)); /* Clear bit. */ - mask ^= (1 << bit); + mask ^= (1U << bit); run->regs_mask[i] = mask; return (ret); @@ -1660,7 +1723,7 @@ arena_run_reg_alloc(arena_run_t *run, ar + (bin->reg_size * regind)); /* Clear bit. */ - mask ^= (1 << bit); + mask ^= (1U << bit); run->regs_mask[i] = mask; /* @@ -1775,8 +1838,8 @@ arena_run_reg_dalloc(arena_run_t *run, a if (elm < run->regs_minelm) run->regs_minelm = elm; bit = regind - (elm << (SIZEOF_INT_2POW + 3)); - assert((run->regs_mask[elm] & (1 << bit)) == 0); - run->regs_mask[elm] |= (1 << bit); + assert((run->regs_mask[elm] & (1U << bit)) == 0); + run->regs_mask[elm] |= (1U << bit); #undef SIZE_INV #undef SIZE_INV_SHIFT } @@ -1837,9 +1900,6 @@ arena_chunk_alloc(arena_t *arena) chunk->arena = arena; - /* LINTED */ - RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk); - /* * Claim that no pages are in use, since the header is merely * overhead. @@ -1859,6 +1919,8 @@ arena_chunk_alloc(arena_t *arena) chunk->map[chunk_npages - 1].npages = chunk_npages - arena_chunk_header_npages; chunk->map[chunk_npages - 1].pos = POS_FREE; + + RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk); } return (chunk); @@ -1895,30 +1957,44 @@ arena_chunk_dealloc(arena_t *arena, aren static arena_run_t * arena_run_alloc(arena_t *arena, size_t size) { - arena_chunk_t *chunk; + arena_chunk_t *chunk, *chunk_tmp; arena_run_t *run; - unsigned need_npages, limit_pages, compl_need_npages; + unsigned need_npages; assert(size <= (chunksize - (arena_chunk_header_npages << pagesize_2pow))); assert((size & pagesize_mask) == 0); /* - * Search through arena's chunks in address order for a free run that is - * large enough. Look for the first fit. + * Search through the arena chunk tree for a large enough free run. + * Tree order ensures that any exact fit is picked immediately or + * otherwise the lowest address of the next size. */ need_npages = (unsigned)(size >> pagesize_2pow); - limit_pages = chunk_npages - arena_chunk_header_npages; - compl_need_npages = limit_pages - need_npages; /* LINTED */ - RB_FOREACH(chunk, arena_chunk_tree_s, &arena->chunks) { + for (;;) { + chunk_tmp = RB_ROOT(&arena->chunks); + chunk = NULL; + while (chunk_tmp) { + if (chunk_tmp->max_frun_npages == need_npages) { + chunk = chunk_tmp; + break; + } + if (chunk_tmp->max_frun_npages < need_npages) { + chunk_tmp = RB_RIGHT(chunk_tmp, link); + continue; + } + chunk = chunk_tmp; + chunk_tmp = RB_LEFT(chunk, link); + } + if (chunk == NULL) + break; /* - * Avoid searching this chunk if there are not enough - * contiguous free pages for there to possibly be a large - * enough free run. + * At this point, the chunk must have a cached run size large + * enough to fit the allocation. */ - if (chunk->pages_used <= compl_need_npages && - need_npages <= chunk->max_frun_npages) { + assert(need_npages <= chunk->max_frun_npages); + { arena_chunk_map_t *mapelm; unsigned i; unsigned max_frun_npages = 0; @@ -1956,7 +2032,9 @@ arena_run_alloc(arena_t *arena, size_t s * chunk->min_frun_ind was already reset above (if * necessary). */ + RB_REMOVE(arena_chunk_tree_s, &arena->chunks, chunk); chunk->max_frun_npages = max_frun_npages; + RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk); } } @@ -2039,8 +2117,11 @@ arena_run_dalloc(arena_t *arena, arena_r assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE); } - if (chunk->map[run_ind].npages > chunk->max_frun_npages) + if (chunk->map[run_ind].npages > chunk->max_frun_npages) { + RB_REMOVE(arena_chunk_tree_s, &arena->chunks, chunk); chunk->max_frun_npages = chunk->map[run_ind].npages; + RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk); + } if (run_ind < chunk->min_frun_ind) chunk->min_frun_ind = run_ind; @@ -3218,16 +3299,17 @@ malloc_print_stats(void) opt_xmalloc ? "X" : "x", opt_zero ? "Z\n" : "z\n"); - _malloc_message("CPUs: ", umax2s(ncpus, s), "\n", ""); - _malloc_message("Max arenas: ", umax2s(narenas, s), "\n", ""); - _malloc_message("Pointer size: ", umax2s(sizeof(void *), s), + _malloc_message("CPUs: ", size_t2s(ncpus, s), "\n", ""); + _malloc_message("Max arenas: ", size_t2s(narenas, s), "\n", ""); + _malloc_message("Pointer size: ", size_t2s(sizeof(void *), s), "\n", ""); - _malloc_message("Quantum size: ", umax2s(quantum, s), "\n", ""); - _malloc_message("Max small size: ", umax2s(small_max, s), "\n", + _malloc_message("Quantum size: ", size_t2s(quantum, s), "\n", ""); + _malloc_message("Max small size: ", size_t2s(small_max, s), "\n", ""); - _malloc_message("Chunk size: ", umax2s(chunksize, s), "", ""); - _malloc_message(" (2^", umax2s(opt_chunk_2pow, s), ")\n", ""); + _malloc_message("Chunk size: ", size_t2s(chunksize, s), "", ""); + _malloc_message(" (2^", size_t2s((size_t)opt_chunk_2pow, s), + ")\n", ""); #ifdef MALLOC_STATS { @@ -3325,6 +3407,7 @@ malloc_init_hard(void) ssize_t linklen; char buf[PATH_MAX + 1]; const char *opts = ""; + int serrno; malloc_mutex_lock(&init_lock); if (malloc_initialized) { @@ -3336,6 +3419,7 @@ malloc_init_hard(void) return (false); } + serrno = errno; /* Get number of CPUs. */ { int mib[2]; @@ -3515,6 +3599,7 @@ malloc_init_hard(void) } } } + errno = serrno; /* Take care to call atexit() only once. */ if (opt_print_stats) { @@ -3613,11 +3698,6 @@ malloc_init_hard(void) opt_narenas_lshift += 2; } -#ifdef NO_TLS - /* Initialize arena key. */ - (void)thr_keycreate(&arenas_map_key, NULL); -#endif - /* Determine how many arenas to use. */ narenas = ncpus; if (opt_narenas_lshift > 0) { @@ -3912,7 +3992,6 @@ _malloc_prefork(void) if (arenas[i] != NULL) malloc_mutex_lock(&arenas[i]->mtx); } - malloc_mutex_unlock(&arenas_mtx); malloc_mutex_lock(&base_mtx); @@ -3930,7 +4009,6 @@ _malloc_postfork(void) malloc_mutex_unlock(&base_mtx); - malloc_mutex_lock(&arenas_mtx); for (i = 0; i < narenas; i++) { if (arenas[i] != NULL) malloc_mutex_unlock(&arenas[i]->mtx);