Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. =================================================================== RCS file: /ftp/cvs/cvsroot/src/lib/libc/stdlib/jemalloc.c,v rcsdiff: /ftp/cvs/cvsroot/src/lib/libc/stdlib/jemalloc.c,v: warning: Unknown phrases like `commitid ...;' are present. retrieving revision 1.2 retrieving revision 1.35 diff -u -p -r1.2 -r1.35 --- src/lib/libc/stdlib/jemalloc.c 2007/10/05 23:42:23 1.2 +++ src/lib/libc/stdlib/jemalloc.c 2014/09/03 19:29:40 1.35 @@ -1,4 +1,4 @@ -/* $NetBSD: jemalloc.c,v 1.2 2007/10/05 23:42:23 ad Exp $ */ +/* $NetBSD: jemalloc.c,v 1.35 2014/09/03 19:29:40 matt Exp $ */ /*- * Copyright (C) 2006,2007 Jason Evans . @@ -118,7 +118,7 @@ #include /* __FBSDID("$FreeBSD: src/lib/libc/stdlib/malloc.c,v 1.147 2007/06/15 22:00:16 jasone Exp $"); */ -__RCSID("$NetBSD: jemalloc.c,v 1.2 2007/10/05 23:42:23 ad Exp $"); +__RCSID("$NetBSD: jemalloc.c,v 1.35 2014/09/03 19:29:40 matt Exp $"); #ifdef __FreeBSD__ #include "libc_private.h" @@ -161,13 +161,33 @@ __RCSID("$NetBSD: jemalloc.c,v 1.2 2007/ #ifdef __NetBSD__ # include -void _malloc_prefork(void); -void _malloc_postfork(void); -ssize_t _write(int, const void *, size_t); -const char *_getprogname(void); +# include "extern.h" + +#define STRERROR_R(a, b, c) __strerror_r(a, b, c); +/* + * A non localized version of strerror, that avoids bringing in + * stdio and the locale code. All the malloc messages are in English + * so why bother? + */ +static int +__strerror_r(int e, char *s, size_t l) +{ + int rval; + size_t slen; + + if (e >= 0 && e < sys_nerr) { + slen = strlcpy(s, sys_errlist[e], l); + rval = 0; + } else { + slen = snprintf_ss(s, l, "Unknown error %u", e); + rval = EINVAL; + } + return slen >= l ? ERANGE : rval; +} #endif #ifdef __FreeBSD__ +#define STRERROR_R(a, b, c) strerror_r(a, b, c); #include "un-namespace.h" #endif @@ -196,6 +216,14 @@ const char *_getprogname(void); #define STRERROR_BUF 64 /* Minimum alignment of allocations is 2^QUANTUM_2POW_MIN bytes. */ + +/* + * If you touch the TINY_MIN_2POW definition for any architecture, please + * make sure to adjust the corresponding definition for JEMALLOC_TINY_MIN_2POW + * in the gcc 4.8 tree in dist/gcc/tree-ssa-ccp.c and verify that a native + * gcc is still buildable! + */ + #ifdef __i386__ # define QUANTUM_2POW_MIN 4 # define SIZEOF_PTR_2POW 2 @@ -205,32 +233,49 @@ const char *_getprogname(void); # define QUANTUM_2POW_MIN 4 # define SIZEOF_PTR_2POW 3 #endif +#ifdef __aarch64__ +# define QUANTUM_2POW_MIN 4 +# define SIZEOF_PTR_2POW 3 +# define NO_TLS +#endif #ifdef __alpha__ # define QUANTUM_2POW_MIN 4 # define SIZEOF_PTR_2POW 3 +# define TINY_MIN_2POW 3 # define NO_TLS #endif #ifdef __sparc64__ # define QUANTUM_2POW_MIN 4 # define SIZEOF_PTR_2POW 3 +# define TINY_MIN_2POW 3 # define NO_TLS #endif #ifdef __amd64__ # define QUANTUM_2POW_MIN 4 # define SIZEOF_PTR_2POW 3 +# define TINY_MIN_2POW 3 #endif #ifdef __arm__ # define QUANTUM_2POW_MIN 3 # define SIZEOF_PTR_2POW 2 # define USE_BRK +# ifdef __ARM_EABI__ +# define TINY_MIN_2POW 3 +# endif # define NO_TLS #endif #ifdef __powerpc__ # define QUANTUM_2POW_MIN 4 # define SIZEOF_PTR_2POW 2 # define USE_BRK +# define TINY_MIN_2POW 3 #endif -#ifdef __sparc__ +#if defined(__sparc__) && !defined(__sparc64__) +# define QUANTUM_2POW_MIN 4 +# define SIZEOF_PTR_2POW 2 +# define USE_BRK +#endif +#ifdef __or1k__ # define QUANTUM_2POW_MIN 4 # define SIZEOF_PTR_2POW 2 # define USE_BRK @@ -255,6 +300,11 @@ const char *_getprogname(void); # define SIZEOF_PTR_2POW 2 # define USE_BRK #endif +#ifdef __hppa__ +# define QUANTUM_2POW_MIN 4 +# define SIZEOF_PTR_2POW 2 +# define USE_BRK +#endif #define SIZEOF_PTR (1 << SIZEOF_PTR_2POW) @@ -263,11 +313,6 @@ const char *_getprogname(void); # define SIZEOF_INT_2POW 2 #endif -/* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */ -#if (!defined(PIC) && !defined(NO_TLS)) -# define NO_TLS -#endif - /* * Size and alignment of memory chunks that are allocated by the OS's virtual * memory system. @@ -283,7 +328,9 @@ const char *_getprogname(void); #define CACHELINE ((size_t)(1 << CACHELINE_2POW)) /* Smallest size class to support. */ -#define TINY_MIN_2POW 1 +#ifndef TINY_MIN_2POW +#define TINY_MIN_2POW 2 +#endif /* * Maximum size class that is a multiple of the quantum, but not (necessarily) @@ -294,20 +341,25 @@ const char *_getprogname(void); #define SMALL_MAX_DEFAULT (1 << SMALL_MAX_2POW_DEFAULT) /* - * Maximum desired run header overhead. Runs are sized as small as possible - * such that this setting is still honored, without violating other constraints. - * The goal is to make runs as small as possible without exceeding a per run - * external fragmentation threshold. + * RUN_MAX_OVRHD indicates maximum desired run header overhead. Runs are sized + * as small as possible such that this setting is still honored, without + * violating other constraints. The goal is to make runs as small as possible + * without exceeding a per run external fragmentation threshold. + * + * We use binary fixed point math for overhead computations, where the binary + * point is implicitly RUN_BFP bits to the left. * - * Note that it is possible to set this low enough that it cannot be honored - * for some/all object sizes, since there is one bit of header overhead per - * object (plus a constant). In such cases, this constraint is relaxed. + * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be + * honored for some/all object sizes, since there is one bit of header overhead + * per object (plus a constant). This constraint is relaxed (ignored) for runs + * that are so small that the per-region overhead is greater than: * - * RUN_MAX_OVRHD_RELAX specifies the maximum number of bits per region of - * overhead for which RUN_MAX_OVRHD is relaxed. + * (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP)) */ -#define RUN_MAX_OVRHD 0.015 -#define RUN_MAX_OVRHD_RELAX 1.5 +#define RUN_BFP 12 +/* \/ Implicit binary fixed point. */ +#define RUN_MAX_OVRHD 0x0000003dU +#define RUN_MAX_OVRHD_RELAX 0x00001800U /* Put a cap on small object run size. This overrides RUN_MAX_OVRHD. */ #define RUN_MAX_SMALL_2POW 15 @@ -620,7 +672,7 @@ static unsigned ncpus; /* VM page size. */ static size_t pagesize; static size_t pagesize_mask; -static size_t pagesize_2pow; +static int pagesize_2pow; /* Various bin-related settings. */ static size_t bin_maxclass; /* Max size class for bins. */ @@ -637,6 +689,7 @@ static size_t quantum_mask; /* (quantum /* Various chunk-related settings. */ static size_t chunksize; static size_t chunksize_mask; /* (chunksize - 1). */ +static int chunksize_2pow; static unsigned chunk_npages; static unsigned arena_chunk_header_npages; static size_t arena_maxclass; /* Max size class for arenas. */ @@ -674,6 +727,7 @@ static void *brk_max; /* Huge allocation statistics. */ static uint64_t huge_nmalloc; static uint64_t huge_ndalloc; +static uint64_t huge_nralloc; static size_t huge_allocated; #endif @@ -750,9 +804,9 @@ static bool opt_junk = false; #endif static bool opt_hint = false; static bool opt_print_stats = false; -static size_t opt_quantum_2pow = QUANTUM_2POW_MIN; -static size_t opt_small_max_2pow = SMALL_MAX_2POW_DEFAULT; -static size_t opt_chunk_2pow = CHUNK_2POW_DEFAULT; +static int opt_quantum_2pow = QUANTUM_2POW_MIN; +static int opt_small_max_2pow = SMALL_MAX_2POW_DEFAULT; +static int opt_chunk_2pow = CHUNK_2POW_DEFAULT; static bool opt_utrace = false; static bool opt_sysv = false; static bool opt_xmalloc = false; @@ -784,7 +838,7 @@ static void wrtmessage(const char *p1, c #ifdef MALLOC_STATS static void malloc_printf(const char *format, ...); #endif -static char *umax2s(uintmax_t x, char *s); +static char *size_t2s(size_t x, char *s); static bool base_pages_alloc(size_t minsize); static void *base_alloc(size_t size); static chunk_node_t *base_chunk_node_alloc(void); @@ -793,10 +847,10 @@ static void base_chunk_node_dealloc(chun static void stats_print(arena_t *arena); #endif static void *pages_map(void *addr, size_t size); +static void *pages_map_align(void *addr, size_t size, int align); static void pages_unmap(void *addr, size_t size); static void *chunk_alloc(size_t size); static void chunk_dealloc(void *chunk, size_t size); -static arena_t *choose_arena_hard(void); static void arena_run_split(arena_t *arena, arena_run_t *run, size_t size); static arena_chunk_t *arena_chunk_alloc(arena_t *arena); static void arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk); @@ -918,10 +972,10 @@ static void wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4) { - _write(STDERR_FILENO, p1, strlen(p1)); - _write(STDERR_FILENO, p2, strlen(p2)); - _write(STDERR_FILENO, p3, strlen(p3)); - _write(STDERR_FILENO, p4, strlen(p4)); + write(STDERR_FILENO, p1, strlen(p1)); + write(STDERR_FILENO, p2, strlen(p2)); + write(STDERR_FILENO, p3, strlen(p3)); + write(STDERR_FILENO, p4, strlen(p4)); } void (*_malloc_message)(const char *p1, const char *p2, const char *p3, @@ -946,18 +1000,19 @@ malloc_printf(const char *format, ...) /* * We don't want to depend on vsnprintf() for production builds, since that can - * cause unnecessary bloat for static binaries. umax2s() provides minimal + * cause unnecessary bloat for static binaries. size_t2s() provides minimal * integer printing functionality, so that malloc_printf() use can be limited to * MALLOC_STATS code. */ #define UMAX2S_BUFSIZE 21 static char * -umax2s(uintmax_t x, char *s) +size_t2s(size_t x, char *s) { unsigned i; /* Make sure UMAX2S_BUFSIZE is large enough. */ - assert(sizeof(uintmax_t) <= 8); + /* LINTED */ + assert(sizeof(size_t) <= 8); i = UMAX2S_BUFSIZE - 1; s[i] = '\0'; @@ -1001,7 +1056,8 @@ base_pages_alloc(size_t minsize) */ incr = (intptr_t)chunksize - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur); - if (incr < minsize) + assert(incr >= 0); + if ((size_t)incr < minsize) incr += csize; brk_prev = sbrk(incr); @@ -1176,6 +1232,7 @@ stats_print(arena_t *arena) * Begin chunk management functions. */ +#ifndef lint static inline int chunk_comp(chunk_node_t *a, chunk_node_t *b) { @@ -1192,12 +1249,11 @@ chunk_comp(chunk_node_t *a, chunk_node_t } /* Generate red-black tree code for chunks. */ -#ifndef lint RB_GENERATE_STATIC(chunk_tree_s, chunk_node_s, link, chunk_comp); #endif static void * -pages_map(void *addr, size_t size) +pages_map_align(void *addr, size_t size, int align) { void *ret; @@ -1205,8 +1261,8 @@ pages_map(void *addr, size_t size) * We don't use MAP_FIXED here, because it can cause the *replacement* * of existing mappings, and we only want to create new mappings. */ - ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, - -1, 0); + ret = mmap(addr, size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON | MAP_ALIGNED(align), -1, 0); assert(ret != NULL); if (ret == MAP_FAILED) @@ -1218,8 +1274,8 @@ pages_map(void *addr, size_t size) if (munmap(ret, size) == -1) { char buf[STRERROR_BUF]; - strerror_r(errno, buf, sizeof(buf)); - _malloc_message(_getprogname(), + STRERROR_R(errno, buf, sizeof(buf)); + _malloc_message(getprogname(), ": (malloc) Error in munmap(): ", buf, "\n"); if (opt_abort) abort(); @@ -1232,6 +1288,13 @@ pages_map(void *addr, size_t size) return (ret); } +static void * +pages_map(void *addr, size_t size) +{ + + return pages_map_align(addr, size, 0); +} + static void pages_unmap(void *addr, size_t size) { @@ -1239,8 +1302,8 @@ pages_unmap(void *addr, size_t size) if (munmap(addr, size) == -1) { char buf[STRERROR_BUF]; - strerror_r(errno, buf, sizeof(buf)); - _malloc_message(_getprogname(), + STRERROR_R(errno, buf, sizeof(buf)); + _malloc_message(getprogname(), ": (malloc) Error in munmap(): ", buf, "\n"); if (opt_abort) abort(); @@ -1299,27 +1362,8 @@ chunk_alloc(size_t size) * anywhere. Beware of size_t wrap-around. */ if (size + chunksize > size) { - if ((ret = pages_map(NULL, size + chunksize)) != NULL) { - size_t offset = CHUNK_ADDR2OFFSET(ret); - - /* - * Success. Clean up unneeded leading/trailing space. - */ - if (offset != 0) { - /* Leading space. */ - pages_unmap(ret, chunksize - offset); - - ret = (void *)((uintptr_t)ret + (chunksize - - offset)); - - /* Trailing space. */ - pages_unmap((void *)((uintptr_t)ret + size), - offset); - } else { - /* Trailing space only. */ - pages_unmap((void *)((uintptr_t)ret + size), - chunksize); - } + if ((ret = pages_map_align(NULL, size, chunksize_2pow)) + != NULL) { goto RETURN; } } @@ -1348,7 +1392,7 @@ chunk_alloc(size_t size) */ incr = (intptr_t)size - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur); - if (incr == size) { + if (incr == (intptr_t)size) { ret = brk_cur; } else { ret = (void *)((intptr_t)brk_cur + incr); @@ -1506,68 +1550,57 @@ chunk_dealloc(void *chunk, size_t size) */ /* - * Choose an arena based on a per-thread value (fast-path code, calls slow-path - * code if necessary). + * Choose an arena based on a per-thread and (optimistically) per-CPU value. + * + * We maintain at least one block of arenas. Usually there are more. + * The blocks are $ncpu arenas in size. Whole blocks are 'hashed' + * amongst threads. To accomplish this, next_arena advances only in + * ncpu steps. */ -static inline arena_t * -choose_arena(void) +static __noinline arena_t * +choose_arena_hard(void) { - arena_t *ret; + unsigned i, curcpu; + arena_t **map; - /* - * We can only use TLS if this is a PIC library, since for the static - * library version, libc's malloc is used by TLS allocation, which - * introduces a bootstrapping issue. - */ - if (__isthreaded == false) { - /* - * Avoid the overhead of TLS for single-threaded operation. If the - * app switches to threaded mode, the initial thread may end up - * being assigned to some other arena, but this one-time switch - * shouldn't cause significant issues. - */ - return (arenas[0]); + /* Initialize the current block of arenas and advance to next. */ + malloc_mutex_lock(&arenas_mtx); + assert(next_arena % ncpus == 0); + assert(narenas % ncpus == 0); + map = &arenas[next_arena]; + set_arenas_map(map); + for (i = 0; i < ncpus; i++) { + if (arenas[next_arena] == NULL) + arenas_extend(next_arena); + next_arena = (next_arena + 1) % narenas; } + malloc_mutex_unlock(&arenas_mtx); - ret = get_arenas_map(); - if (ret == NULL) - ret = choose_arena_hard(); - - assert(ret != NULL); - return (ret); + /* + * If we were unable to allocate an arena above, then default to + * the first arena, which is always present. + */ + curcpu = thr_curcpu(); + if (map[curcpu] != NULL) + return map[curcpu]; + return arenas[0]; } -/* - * Choose an arena based on a per-thread value (slow-path code only, called - * only by choose_arena()). - */ -static arena_t * -choose_arena_hard(void) +static inline arena_t * +choose_arena(void) { - arena_t *ret; + unsigned curcpu; + arena_t **map; - assert(__isthreaded); + map = get_arenas_map(); + curcpu = thr_curcpu(); + if (__predict_true(map != NULL && map[curcpu] != NULL)) + return map[curcpu]; - /* Assign one of the arenas to this thread, in a round-robin fashion. */ - malloc_mutex_lock(&arenas_mtx); - ret = arenas[next_arena]; - if (ret == NULL) - ret = arenas_extend(next_arena); - if (ret == NULL) { - /* - * Make sure that this function never returns NULL, so that - * choose_arena() doesn't have to check for a NULL return - * value. - */ - ret = arenas[0]; - } - next_arena = (next_arena + 1) % narenas; - malloc_mutex_unlock(&arenas_mtx); - set_arenas_map(ret); - - return (ret); + return choose_arena_hard(); } +#ifndef lint static inline int arena_chunk_comp(arena_chunk_t *a, arena_chunk_t *b) { @@ -1584,10 +1617,10 @@ arena_chunk_comp(arena_chunk_t *a, arena } /* Generate red-black tree code for arena chunks. */ -#ifndef lint RB_GENERATE_STATIC(arena_chunk_tree_s, arena_chunk_s, link, arena_chunk_comp); #endif +#ifndef lint static inline int arena_run_comp(arena_run_t *a, arena_run_t *b) { @@ -1604,7 +1637,6 @@ arena_run_comp(arena_run_t *a, arena_run } /* Generate red-black tree code for arena runs. */ -#ifndef lint RB_GENERATE_STATIC(arena_run_tree_s, arena_run_s, link, arena_run_comp); #endif @@ -1663,6 +1695,7 @@ arena_run_reg_alloc(arena_run_t *run, ar } } /* Not reached. */ + /* LINTED */ assert(0); return (NULL); } @@ -1705,6 +1738,7 @@ arena_run_reg_dalloc(arena_run_t *run, a }; unsigned diff, regind, elm, bit; + /* LINTED */ assert(run->magic == ARENA_RUN_MAGIC); assert(((sizeof(size_invs)) / sizeof(unsigned)) + 3 >= (SMALL_MAX_DEFAULT >> QUANTUM_2POW_MIN)); @@ -1741,7 +1775,7 @@ arena_run_reg_dalloc(arena_run_t *run, a * The page size is too large for us to use the lookup * table. Use real division. */ - regind = diff / size; + regind = (unsigned)(diff / size); } } else if (size <= ((sizeof(size_invs) / sizeof(unsigned)) << QUANTUM_2POW_MIN) + 2) { @@ -1754,7 +1788,7 @@ arena_run_reg_dalloc(arena_run_t *run, a * if the user increases small_max via the 'S' runtime * configuration option. */ - regind = diff / size; + regind = (unsigned)(diff / size); }; assert(diff == regind * size); assert(regind < bin->nregs); @@ -1780,7 +1814,7 @@ arena_run_split(arena_t *arena, arena_ru run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) >> pagesize_2pow); total_pages = chunk->map[run_ind].npages; - need_pages = (size >> pagesize_2pow); + need_pages = (unsigned)(size >> pagesize_2pow); assert(need_pages <= total_pages); rem_pages = total_pages - need_pages; @@ -1895,7 +1929,7 @@ arena_run_alloc(arena_t *arena, size_t s * Search through arena's chunks in address order for a free run that is * large enough. Look for the first fit. */ - need_npages = (size >> pagesize_2pow); + need_npages = (unsigned)(size >> pagesize_2pow); limit_pages = chunk_npages - arena_chunk_header_npages; compl_need_npages = limit_pages - need_npages; /* LINTED */ @@ -1973,7 +2007,7 @@ arena_run_dalloc(arena_t *arena, arena_r >> pagesize_2pow); assert(run_ind >= arena_chunk_header_npages); assert(run_ind < (chunksize >> pagesize_2pow)); - run_pages = (size >> pagesize_2pow); + run_pages = (unsigned)(size >> pagesize_2pow); assert(run_pages == chunk->map[run_ind].npages); /* Subtract pages from count of pages used in chunk. */ @@ -2136,7 +2170,6 @@ arena_bin_run_size_calc(arena_bin_t *bin size_t try_run_size, good_run_size; unsigned good_nregs, good_mask_nelms, good_reg0_offset; unsigned try_nregs, try_mask_nelms, try_reg0_offset; - float max_ovrhd = RUN_MAX_OVRHD; assert(min_run_size >= pagesize); assert(min_run_size <= arena_maxclass); @@ -2153,13 +2186,14 @@ arena_bin_run_size_calc(arena_bin_t *bin * header's mask length and the number of regions. */ try_run_size = min_run_size; - try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin->reg_size) - + 1; /* Counter-act the first line of the loop. */ + try_nregs = (unsigned)(((try_run_size - sizeof(arena_run_t)) / + bin->reg_size) + 1); /* Counter-act try_nregs-- in loop. */ do { try_nregs--; try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) + ((try_nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1)) ? 1 : 0); - try_reg0_offset = try_run_size - (try_nregs * bin->reg_size); + try_reg0_offset = (unsigned)(try_run_size - + (try_nregs * bin->reg_size)); } while (sizeof(arena_run_t) + (sizeof(unsigned) * (try_mask_nelms - 1)) > try_reg0_offset); @@ -2175,21 +2209,20 @@ arena_bin_run_size_calc(arena_bin_t *bin /* Try more aggressive settings. */ try_run_size += pagesize; - try_nregs = ((try_run_size - sizeof(arena_run_t)) / - bin->reg_size) + 1; /* Counter-act try_nregs-- in loop. */ + try_nregs = (unsigned)(((try_run_size - sizeof(arena_run_t)) / + bin->reg_size) + 1); /* Counter-act try_nregs-- in loop. */ do { try_nregs--; try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) + ((try_nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1)) ? 1 : 0); - try_reg0_offset = try_run_size - (try_nregs * - bin->reg_size); + try_reg0_offset = (unsigned)(try_run_size - (try_nregs * + bin->reg_size)); } while (sizeof(arena_run_t) + (sizeof(unsigned) * (try_mask_nelms - 1)) > try_reg0_offset); } while (try_run_size <= arena_maxclass && try_run_size <= RUN_MAX_SMALL - && max_ovrhd > RUN_MAX_OVRHD_RELAX / ((float)(bin->reg_size << 3)) - && ((float)(try_reg0_offset)) / ((float)(try_run_size)) > - max_ovrhd); + && RUN_MAX_OVRHD * (bin->reg_size << 3) > RUN_MAX_OVRHD_RELAX + && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size); assert(sizeof(arena_run_t) + (sizeof(unsigned) * (good_mask_nelms - 1)) <= good_reg0_offset); @@ -2319,7 +2352,7 @@ arena_palloc(arena_t *arena, size_t alig assert((size & pagesize_mask) == 0); assert((alignment & pagesize_mask) == 0); - npages = size >> pagesize_2pow; + npages = (unsigned)(size >> pagesize_2pow); malloc_mutex_lock(&arena->mtx); ret = (void *)arena_run_alloc(arena, alloc_size); @@ -2334,7 +2367,7 @@ arena_palloc(arena_t *arena, size_t alig assert((offset & pagesize_mask) == 0); assert(offset < alloc_size); if (offset == 0) { - pageind = (((uintptr_t)ret - (uintptr_t)chunk) >> + pageind = (unsigned)(((uintptr_t)ret - (uintptr_t)chunk) >> pagesize_2pow); /* Update the map for the run to be kept. */ @@ -2345,13 +2378,13 @@ arena_palloc(arena_t *arena, size_t alig /* Trim trailing space. */ arena_palloc_trim(arena, chunk, pageind + npages, - (alloc_size - size) >> pagesize_2pow); + (unsigned)((alloc_size - size) >> pagesize_2pow)); } else { size_t leadsize, trailsize; leadsize = alignment - offset; ret = (void *)((uintptr_t)ret + leadsize); - pageind = (((uintptr_t)ret - (uintptr_t)chunk) >> + pageind = (unsigned)(((uintptr_t)ret - (uintptr_t)chunk) >> pagesize_2pow); /* Update the map for the run to be kept. */ @@ -2361,15 +2394,16 @@ arena_palloc(arena_t *arena, size_t alig } /* Trim leading space. */ - arena_palloc_trim(arena, chunk, pageind - (leadsize >> - pagesize_2pow), leadsize >> pagesize_2pow); + arena_palloc_trim(arena, chunk, + (unsigned)(pageind - (leadsize >> pagesize_2pow)), + (unsigned)(leadsize >> pagesize_2pow)); trailsize = alloc_size - leadsize - size; if (trailsize != 0) { /* Trim trailing space. */ assert(trailsize < alloc_size); arena_palloc_trim(arena, chunk, pageind + npages, - trailsize >> pagesize_2pow); + (unsigned)(trailsize >> pagesize_2pow)); } } @@ -2403,7 +2437,8 @@ arena_salloc(const void *ptr) * affects this function, so we don't need to lock. */ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> pagesize_2pow); + pageind = (unsigned)(((uintptr_t)ptr - (uintptr_t)chunk) >> + pagesize_2pow); mapelm = &chunk->map[pageind]; if (mapelm->pos != 0 || ptr != (char *)((uintptr_t)chunk) + (pageind << pagesize_2pow)) { @@ -2483,7 +2518,8 @@ arena_dalloc(arena_t *arena, arena_chunk assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); - pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> pagesize_2pow); + pageind = (unsigned)(((uintptr_t)ptr - (uintptr_t)chunk) >> + pagesize_2pow); mapelm = &chunk->map[pageind]; if (mapelm->pos != 0 || ptr != (char *)((uintptr_t)chunk) + (pageind << pagesize_2pow)) { @@ -2667,7 +2703,7 @@ arenas_extend(unsigned ind) * by using arenas[0]. In practice, this is an extremely unlikely * failure. */ - _malloc_message(_getprogname(), + _malloc_message(getprogname(), ": (malloc) Error initializing arena\n", "", ""); if (opt_abort) abort(); @@ -2827,6 +2863,84 @@ huge_ralloc(void *ptr, size_t size, size return (ptr); } + if (CHUNK_ADDR2BASE(ptr) == ptr +#ifdef USE_BRK + && ((uintptr_t)ptr < (uintptr_t)brk_base + || (uintptr_t)ptr >= (uintptr_t)brk_max) +#endif + ) { + chunk_node_t *node, key; + void *newptr; + size_t oldcsize; + size_t newcsize; + + newcsize = CHUNK_CEILING(size); + oldcsize = CHUNK_CEILING(oldsize); + assert(oldcsize != newcsize); + if (newcsize == 0) { + /* size_t wrap-around */ + return (NULL); + } + + /* + * Remove the old region from the tree now. If mremap() + * returns the region to the system, other thread may + * map it for same huge allocation and insert it to the + * tree before we acquire the mutex lock again. + */ + malloc_mutex_lock(&chunks_mtx); + key.chunk = __DECONST(void *, ptr); + /* LINTED */ + node = RB_FIND(chunk_tree_s, &huge, &key); + assert(node != NULL); + assert(node->chunk == ptr); + assert(node->size == oldcsize); + RB_REMOVE(chunk_tree_s, &huge, node); + malloc_mutex_unlock(&chunks_mtx); + + newptr = mremap(ptr, oldcsize, NULL, newcsize, + MAP_ALIGNED(chunksize_2pow)); + if (newptr == MAP_FAILED) { + /* We still own the old region. */ + malloc_mutex_lock(&chunks_mtx); + RB_INSERT(chunk_tree_s, &huge, node); + malloc_mutex_unlock(&chunks_mtx); + } else { + assert(CHUNK_ADDR2BASE(newptr) == newptr); + + /* Insert new or resized old region. */ + malloc_mutex_lock(&chunks_mtx); + node->size = newcsize; + node->chunk = newptr; + RB_INSERT(chunk_tree_s, &huge, node); +#ifdef MALLOC_STATS + huge_nralloc++; + huge_allocated += newcsize - oldcsize; + if (newcsize > oldcsize) { + stats_chunks.curchunks += + (newcsize - oldcsize) / chunksize; + if (stats_chunks.curchunks > + stats_chunks.highchunks) + stats_chunks.highchunks = + stats_chunks.curchunks; + } else { + stats_chunks.curchunks -= + (oldcsize - newcsize) / chunksize; + } +#endif + malloc_mutex_unlock(&chunks_mtx); + + if (opt_junk && size < oldsize) { + memset((void *)((uintptr_t)newptr + size), 0x5a, + newcsize - size); + } else if (opt_zero && size > oldsize) { + memset((void *)((uintptr_t)newptr + oldsize), 0, + size - oldsize); + } + return (newptr); + } + } + /* * If we get here, then size and oldsize are different enough that we * need to use a different size class. In that case, fall back to @@ -3126,16 +3240,17 @@ malloc_print_stats(void) opt_xmalloc ? "X" : "x", opt_zero ? "Z\n" : "z\n"); - _malloc_message("CPUs: ", umax2s(ncpus, s), "\n", ""); - _malloc_message("Max arenas: ", umax2s(narenas, s), "\n", ""); - _malloc_message("Pointer size: ", umax2s(sizeof(void *), s), + _malloc_message("CPUs: ", size_t2s(ncpus, s), "\n", ""); + _malloc_message("Max arenas: ", size_t2s(narenas, s), "\n", ""); + _malloc_message("Pointer size: ", size_t2s(sizeof(void *), s), "\n", ""); - _malloc_message("Quantum size: ", umax2s(quantum, s), "\n", ""); - _malloc_message("Max small size: ", umax2s(small_max, s), "\n", + _malloc_message("Quantum size: ", size_t2s(quantum, s), "\n", ""); + _malloc_message("Max small size: ", size_t2s(small_max, s), "\n", ""); - _malloc_message("Chunk size: ", umax2s(chunksize, s), "", ""); - _malloc_message(" (2^", umax2s(opt_chunk_2pow, s), ")\n", ""); + _malloc_message("Chunk size: ", size_t2s(chunksize, s), "", ""); + _malloc_message(" (2^", size_t2s((size_t)opt_chunk_2pow, s), + ")\n", ""); #ifdef MALLOC_STATS { @@ -3188,10 +3303,11 @@ malloc_print_stats(void) /* Print chunk stats. */ malloc_printf( - "huge: nmalloc ndalloc allocated\n"); - malloc_printf(" %12llu %12llu %12zu\n", - huge_nmalloc, huge_ndalloc, huge_allocated - * chunksize); + "huge: nmalloc ndalloc " + "nralloc allocated\n"); + malloc_printf(" %12llu %12llu %12llu %12zu\n", + huge_nmalloc, huge_ndalloc, huge_nralloc, + huge_allocated); /* Print stats for each arena. */ for (i = 0; i < narenas; i++) { @@ -3229,9 +3345,10 @@ static bool malloc_init_hard(void) { unsigned i, j; - int linklen; + ssize_t linklen; char buf[PATH_MAX + 1]; const char *opts = ""; + int serrno; malloc_mutex_lock(&init_lock); if (malloc_initialized) { @@ -3243,6 +3360,7 @@ malloc_init_hard(void) return (false); } + serrno = errno; /* Get number of CPUs. */ { int mib[2]; @@ -3293,8 +3411,8 @@ malloc_init_hard(void) } break; case 1: - if (issetugid() == 0 && (opts = - getenv("MALLOC_OPTIONS")) != NULL) { + if ((opts = getenv("MALLOC_OPTIONS")) != NULL && + issetugid() == 0) { /* * Do nothing; opts is already initialized to * the value of the MALLOC_OPTIONS environment @@ -3320,6 +3438,7 @@ malloc_init_hard(void) break; default: /* NOTREACHED */ + /* LINTED */ assert(false); } @@ -3353,14 +3472,8 @@ malloc_init_hard(void) opt_chunk_2pow--; break; case 'K': - /* - * There must be fewer pages in a chunk than - * can be recorded by the pos field of - * arena_chunk_map_t, in order to make POS_FREE - * special. - */ - if (opt_chunk_2pow - pagesize_2pow - < (sizeof(uint32_t) << 3) - 1) + if (opt_chunk_2pow + 1 < + (int)(sizeof(size_t) << 3)) opt_chunk_2pow++; break; case 'n': @@ -3420,13 +3533,14 @@ malloc_init_hard(void) cbuf[0] = opts[j]; cbuf[1] = '\0'; - _malloc_message(_getprogname(), + _malloc_message(getprogname(), ": (malloc) Unsupported character in " "malloc options: '", cbuf, "'\n"); } } } } + errno = serrno; /* Take care to call atexit() only once. */ if (opt_print_stats) { @@ -3442,10 +3556,10 @@ malloc_init_hard(void) /* Set bin-related variables. */ bin_maxclass = (pagesize >> 1); assert(opt_quantum_2pow >= TINY_MIN_2POW); - ntbins = opt_quantum_2pow - TINY_MIN_2POW; + ntbins = (unsigned)(opt_quantum_2pow - TINY_MIN_2POW); assert(ntbins <= opt_quantum_2pow); - nqbins = (small_max >> opt_quantum_2pow); - nsbins = pagesize_2pow - opt_small_max_2pow - 1; + nqbins = (unsigned)(small_max >> opt_quantum_2pow); + nsbins = (unsigned)(pagesize_2pow - opt_small_max_2pow - 1); /* Set variables according to the value of opt_quantum_2pow. */ quantum = (1 << opt_quantum_2pow); @@ -3459,12 +3573,13 @@ malloc_init_hard(void) /* Set variables according to the value of opt_chunk_2pow. */ chunksize = (1LU << opt_chunk_2pow); chunksize_mask = chunksize - 1; - chunk_npages = (chunksize >> pagesize_2pow); + chunksize_2pow = (unsigned)opt_chunk_2pow; + chunk_npages = (unsigned)(chunksize >> pagesize_2pow); { unsigned header_size; - header_size = sizeof(arena_chunk_t) + (sizeof(arena_chunk_map_t) - * (chunk_npages - 1)); + header_size = (unsigned)(sizeof(arena_chunk_t) + + (sizeof(arena_chunk_map_t) * (chunk_npages - 1))); arena_chunk_header_npages = (header_size >> pagesize_2pow); if ((header_size & pagesize_mask) != 0) arena_chunk_header_npages++; @@ -3496,6 +3611,7 @@ malloc_init_hard(void) #ifdef MALLOC_STATS huge_nmalloc = 0; huge_ndalloc = 0; + huge_nralloc = 0; huge_allocated = 0; #endif RB_INIT(&old_chunks); @@ -3538,7 +3654,7 @@ malloc_init_hard(void) * can handle. */ if (narenas * sizeof(arena_t *) > chunksize) - narenas = chunksize / sizeof(arena_t *); + narenas = (unsigned)(chunksize / sizeof(arena_t *)); } else if (opt_narenas_lshift < 0) { if ((narenas << opt_narenas_lshift) < narenas) narenas <<= opt_narenas_lshift; @@ -3610,7 +3726,7 @@ malloc(size_t size) RETURN: if (ret == NULL) { if (opt_xmalloc) { - _malloc_message(_getprogname(), + _malloc_message(getprogname(), ": (malloc) Error in malloc(): out of memory\n", "", ""); abort(); @@ -3622,9 +3738,6 @@ RETURN: return (ret); } -/* XXXAD */ -int posix_memalign(void **memptr, size_t alignment, size_t size); - int posix_memalign(void **memptr, size_t alignment, size_t size) { @@ -3638,7 +3751,7 @@ posix_memalign(void **memptr, size_t ali if (((alignment - 1) & alignment) != 0 || alignment < sizeof(void *)) { if (opt_xmalloc) { - _malloc_message(_getprogname(), + _malloc_message(getprogname(), ": (malloc) Error in posix_memalign(): " "invalid alignment\n", "", ""); abort(); @@ -3653,7 +3766,7 @@ posix_memalign(void **memptr, size_t ali if (result == NULL) { if (opt_xmalloc) { - _malloc_message(_getprogname(), + _malloc_message(getprogname(), ": (malloc) Error in posix_memalign(): out of memory\n", "", ""); abort(); @@ -3708,7 +3821,7 @@ calloc(size_t num, size_t size) RETURN: if (ret == NULL) { if (opt_xmalloc) { - _malloc_message(_getprogname(), + _malloc_message(getprogname(), ": (malloc) Error in calloc(): out of memory\n", "", ""); abort(); @@ -3743,7 +3856,7 @@ realloc(void *ptr, size_t size) if (ret == NULL) { if (opt_xmalloc) { - _malloc_message(_getprogname(), + _malloc_message(getprogname(), ": (malloc) Error in realloc(): out of " "memory\n", "", ""); abort(); @@ -3758,7 +3871,7 @@ realloc(void *ptr, size_t size) if (ret == NULL) { if (opt_xmalloc) { - _malloc_message(_getprogname(), + _malloc_message(getprogname(), ": (malloc) Error in realloc(): out of " "memory\n", "", ""); abort(); @@ -3825,7 +3938,6 @@ _malloc_prefork(void) if (arenas[i] != NULL) malloc_mutex_lock(&arenas[i]->mtx); } - malloc_mutex_unlock(&arenas_mtx); malloc_mutex_lock(&base_mtx); @@ -3843,7 +3955,6 @@ _malloc_postfork(void) malloc_mutex_unlock(&base_mtx); - malloc_mutex_lock(&arenas_mtx); for (i = 0; i < narenas; i++) { if (arenas[i] != NULL) malloc_mutex_unlock(&arenas[i]->mtx);