Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. =================================================================== RCS file: /ftp/cvs/cvsroot/src/lib/libc/stdlib/jemalloc.c,v rcsdiff: /ftp/cvs/cvsroot/src/lib/libc/stdlib/jemalloc.c,v: warning: Unknown phrases like `commitid ...;' are present. retrieving revision 1.19 retrieving revision 1.32 diff -u -p -r1.19 -r1.32 --- src/lib/libc/stdlib/jemalloc.c 2008/06/23 10:46:25 1.19 +++ src/lib/libc/stdlib/jemalloc.c 2014/02/25 12:13:19 1.32 @@ -1,4 +1,4 @@ -/* $NetBSD: jemalloc.c,v 1.19 2008/06/23 10:46:25 ad Exp $ */ +/* $NetBSD: jemalloc.c,v 1.32 2014/02/25 12:13:19 martin Exp $ */ /*- * Copyright (C) 2006,2007 Jason Evans . @@ -118,7 +118,7 @@ #include /* __FBSDID("$FreeBSD: src/lib/libc/stdlib/malloc.c,v 1.147 2007/06/15 22:00:16 jasone Exp $"); */ -__RCSID("$NetBSD: jemalloc.c,v 1.19 2008/06/23 10:46:25 ad Exp $"); +__RCSID("$NetBSD: jemalloc.c,v 1.32 2014/02/25 12:13:19 martin Exp $"); #ifdef __FreeBSD__ #include "libc_private.h" @@ -216,6 +216,14 @@ __strerror_r(int e, char *s, size_t l) #define STRERROR_BUF 64 /* Minimum alignment of allocations is 2^QUANTUM_2POW_MIN bytes. */ + +/* + * If you touch the TINY_MIN_2POW definition for any architecture, please + * make sure to adjust the corresponding definition for JEMALLOC_TINY_MIN_2POW + * in the gcc 4.8 tree in dist/gcc/tree-ssa-ccp.c and verify that a native + * gcc is still buildable! + */ + #ifdef __i386__ # define QUANTUM_2POW_MIN 4 # define SIZEOF_PTR_2POW 2 @@ -228,27 +236,34 @@ __strerror_r(int e, char *s, size_t l) #ifdef __alpha__ # define QUANTUM_2POW_MIN 4 # define SIZEOF_PTR_2POW 3 +# define TINY_MIN_2POW 3 # define NO_TLS #endif #ifdef __sparc64__ # define QUANTUM_2POW_MIN 4 # define SIZEOF_PTR_2POW 3 +# define TINY_MIN_2POW 3 # define NO_TLS #endif #ifdef __amd64__ # define QUANTUM_2POW_MIN 4 # define SIZEOF_PTR_2POW 3 +# define TINY_MIN_2POW 3 #endif #ifdef __arm__ # define QUANTUM_2POW_MIN 3 # define SIZEOF_PTR_2POW 2 # define USE_BRK +# ifdef __ARM_EABI__ +# define TINY_MIN_2POW 3 +# endif # define NO_TLS #endif #ifdef __powerpc__ # define QUANTUM_2POW_MIN 4 # define SIZEOF_PTR_2POW 2 # define USE_BRK +# define TINY_MIN_2POW 3 #endif #if defined(__sparc__) && !defined(__sparc64__) # define QUANTUM_2POW_MIN 4 @@ -288,11 +303,6 @@ __strerror_r(int e, char *s, size_t l) # define SIZEOF_INT_2POW 2 #endif -/* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */ -#if (!defined(PIC) && !defined(NO_TLS)) -# define NO_TLS -#endif - /* * Size and alignment of memory chunks that are allocated by the OS's virtual * memory system. @@ -308,7 +318,9 @@ __strerror_r(int e, char *s, size_t l) #define CACHELINE ((size_t)(1 << CACHELINE_2POW)) /* Smallest size class to support. */ -#define TINY_MIN_2POW 1 +#ifndef TINY_MIN_2POW +#define TINY_MIN_2POW 2 +#endif /* * Maximum size class that is a multiple of the quantum, but not (necessarily) @@ -319,20 +331,25 @@ __strerror_r(int e, char *s, size_t l) #define SMALL_MAX_DEFAULT (1 << SMALL_MAX_2POW_DEFAULT) /* - * Maximum desired run header overhead. Runs are sized as small as possible - * such that this setting is still honored, without violating other constraints. - * The goal is to make runs as small as possible without exceeding a per run - * external fragmentation threshold. + * RUN_MAX_OVRHD indicates maximum desired run header overhead. Runs are sized + * as small as possible such that this setting is still honored, without + * violating other constraints. The goal is to make runs as small as possible + * without exceeding a per run external fragmentation threshold. * - * Note that it is possible to set this low enough that it cannot be honored - * for some/all object sizes, since there is one bit of header overhead per - * object (plus a constant). In such cases, this constraint is relaxed. + * We use binary fixed point math for overhead computations, where the binary + * point is implicitly RUN_BFP bits to the left. * - * RUN_MAX_OVRHD_RELAX specifies the maximum number of bits per region of - * overhead for which RUN_MAX_OVRHD is relaxed. + * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be + * honored for some/all object sizes, since there is one bit of header overhead + * per object (plus a constant). This constraint is relaxed (ignored) for runs + * that are so small that the per-region overhead is greater than: + * + * (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP)) */ -#define RUN_MAX_OVRHD 0.015 -#define RUN_MAX_OVRHD_RELAX 1.5 +#define RUN_BFP 12 +/* \/ Implicit binary fixed point. */ +#define RUN_MAX_OVRHD 0x0000003dU +#define RUN_MAX_OVRHD_RELAX 0x00001800U /* Put a cap on small object run size. This overrides RUN_MAX_OVRHD. */ #define RUN_MAX_SMALL_2POW 15 @@ -811,7 +828,7 @@ static void wrtmessage(const char *p1, c #ifdef MALLOC_STATS static void malloc_printf(const char *format, ...); #endif -static char *umax2s(uintmax_t x, char *s); +static char *size_t2s(size_t x, char *s); static bool base_pages_alloc(size_t minsize); static void *base_alloc(size_t size); static chunk_node_t *base_chunk_node_alloc(void); @@ -973,19 +990,19 @@ malloc_printf(const char *format, ...) /* * We don't want to depend on vsnprintf() for production builds, since that can - * cause unnecessary bloat for static binaries. umax2s() provides minimal + * cause unnecessary bloat for static binaries. size_t2s() provides minimal * integer printing functionality, so that malloc_printf() use can be limited to * MALLOC_STATS code. */ #define UMAX2S_BUFSIZE 21 static char * -umax2s(uintmax_t x, char *s) +size_t2s(size_t x, char *s) { unsigned i; /* Make sure UMAX2S_BUFSIZE is large enough. */ /* LINTED */ - assert(sizeof(uintmax_t) <= 8); + assert(sizeof(size_t) <= 8); i = UMAX2S_BUFSIZE - 1; s[i] = '\0'; @@ -1029,7 +1046,8 @@ base_pages_alloc(size_t minsize) */ incr = (intptr_t)chunksize - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur); - if (incr < minsize) + assert(incr >= 0); + if ((size_t)incr < minsize) incr += csize; brk_prev = sbrk(incr); @@ -1364,7 +1382,7 @@ chunk_alloc(size_t size) */ incr = (intptr_t)size - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur); - if (incr == size) { + if (incr == (intptr_t)size) { ret = brk_cur; } else { ret = (void *)((intptr_t)brk_cur + incr); @@ -2142,7 +2160,6 @@ arena_bin_run_size_calc(arena_bin_t *bin size_t try_run_size, good_run_size; unsigned good_nregs, good_mask_nelms, good_reg0_offset; unsigned try_nregs, try_mask_nelms, try_reg0_offset; - float max_ovrhd = RUN_MAX_OVRHD; assert(min_run_size >= pagesize); assert(min_run_size <= arena_maxclass); @@ -2160,7 +2177,7 @@ arena_bin_run_size_calc(arena_bin_t *bin */ try_run_size = min_run_size; try_nregs = (unsigned)(((try_run_size - sizeof(arena_run_t)) / - bin->reg_size) + 1); /* Counter-act the first line of the loop. */ + bin->reg_size) + 1); /* Counter-act try_nregs-- in loop. */ do { try_nregs--; try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) + @@ -2194,9 +2211,8 @@ arena_bin_run_size_calc(arena_bin_t *bin } while (sizeof(arena_run_t) + (sizeof(unsigned) * (try_mask_nelms - 1)) > try_reg0_offset); } while (try_run_size <= arena_maxclass && try_run_size <= RUN_MAX_SMALL - && max_ovrhd > RUN_MAX_OVRHD_RELAX / ((float)(bin->reg_size << 3)) - && ((float)(try_reg0_offset)) / ((float)(try_run_size)) > - max_ovrhd); + && RUN_MAX_OVRHD * (bin->reg_size << 3) > RUN_MAX_OVRHD_RELAX + && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size); assert(sizeof(arena_run_t) + (sizeof(unsigned) * (good_mask_nelms - 1)) <= good_reg0_offset); @@ -2855,25 +2871,38 @@ huge_ralloc(void *ptr, size_t size, size /* size_t wrap-around */ return (NULL); } + + /* + * Remove the old region from the tree now. If mremap() + * returns the region to the system, other thread may + * map it for same huge allocation and insert it to the + * tree before we acquire the mutex lock again. + */ + malloc_mutex_lock(&chunks_mtx); + key.chunk = __DECONST(void *, ptr); + /* LINTED */ + node = RB_FIND(chunk_tree_s, &huge, &key); + assert(node != NULL); + assert(node->chunk == ptr); + assert(node->size == oldcsize); + RB_REMOVE(chunk_tree_s, &huge, node); + malloc_mutex_unlock(&chunks_mtx); + newptr = mremap(ptr, oldcsize, NULL, newcsize, MAP_ALIGNED(chunksize_2pow)); - if (newptr != MAP_FAILED) { + if (newptr == MAP_FAILED) { + /* We still own the old region. */ + malloc_mutex_lock(&chunks_mtx); + RB_INSERT(chunk_tree_s, &huge, node); + malloc_mutex_unlock(&chunks_mtx); + } else { assert(CHUNK_ADDR2BASE(newptr) == newptr); - /* update tree */ + /* Insert new or resized old region. */ malloc_mutex_lock(&chunks_mtx); - key.chunk = __DECONST(void *, ptr); - /* LINTED */ - node = RB_FIND(chunk_tree_s, &huge, &key); - assert(node != NULL); - assert(node->chunk == ptr); - assert(node->size == oldcsize); node->size = newcsize; - if (ptr != newptr) { - RB_REMOVE(chunk_tree_s, &huge, node); - node->chunk = newptr; - RB_INSERT(chunk_tree_s, &huge, node); - } + node->chunk = newptr; + RB_INSERT(chunk_tree_s, &huge, node); #ifdef MALLOC_STATS huge_nralloc++; huge_allocated += newcsize - oldcsize; @@ -3201,16 +3230,17 @@ malloc_print_stats(void) opt_xmalloc ? "X" : "x", opt_zero ? "Z\n" : "z\n"); - _malloc_message("CPUs: ", umax2s(ncpus, s), "\n", ""); - _malloc_message("Max arenas: ", umax2s(narenas, s), "\n", ""); - _malloc_message("Pointer size: ", umax2s(sizeof(void *), s), + _malloc_message("CPUs: ", size_t2s(ncpus, s), "\n", ""); + _malloc_message("Max arenas: ", size_t2s(narenas, s), "\n", ""); + _malloc_message("Pointer size: ", size_t2s(sizeof(void *), s), "\n", ""); - _malloc_message("Quantum size: ", umax2s(quantum, s), "\n", ""); - _malloc_message("Max small size: ", umax2s(small_max, s), "\n", + _malloc_message("Quantum size: ", size_t2s(quantum, s), "\n", ""); + _malloc_message("Max small size: ", size_t2s(small_max, s), "\n", ""); - _malloc_message("Chunk size: ", umax2s(chunksize, s), "", ""); - _malloc_message(" (2^", umax2s(opt_chunk_2pow, s), ")\n", ""); + _malloc_message("Chunk size: ", size_t2s(chunksize, s), "", ""); + _malloc_message(" (2^", size_t2s((size_t)opt_chunk_2pow, s), + ")\n", ""); #ifdef MALLOC_STATS { @@ -3308,6 +3338,7 @@ malloc_init_hard(void) ssize_t linklen; char buf[PATH_MAX + 1]; const char *opts = ""; + int serrno; malloc_mutex_lock(&init_lock); if (malloc_initialized) { @@ -3319,6 +3350,7 @@ malloc_init_hard(void) return (false); } + serrno = errno; /* Get number of CPUs. */ { int mib[2]; @@ -3430,14 +3462,8 @@ malloc_init_hard(void) opt_chunk_2pow--; break; case 'K': - /* - * There must be fewer pages in a chunk than - * can be recorded by the pos field of - * arena_chunk_map_t, in order to make POS_FREE - * special. - */ - if (opt_chunk_2pow - pagesize_2pow - < (sizeof(uint32_t) << 3) - 1) + if (opt_chunk_2pow + 1 < + (int)(sizeof(size_t) << 3)) opt_chunk_2pow++; break; case 'n': @@ -3504,6 +3530,7 @@ malloc_init_hard(void) } } } + errno = serrno; /* Take care to call atexit() only once. */ if (opt_print_stats) {