123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463 |
- /* frame_malloc.c -*-C-*-
- *
- *************************************************************************
- *
- * @copyright
- * Copyright (C) 2009-2013, Intel Corporation
- * All rights reserved.
- *
- * @copyright
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * @copyright
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
- * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- **************************************************************************/
- #include "frame_malloc.h"
- #include "bug.h"
- #include "local_state.h"
- #include "cilk_malloc.h"
- #ifndef __VXWORKS__
- #include <memory.h>
- #endif
- /* #define USE_MMAP 1 */
- #if USE_MMAP
- #define __USE_MISC 1
- #include <sys/mman.h>
- #include <errno.h>
- #endif
- // Define to fill the stack frame header with the fill character when pushing
- // it on a free list. Note that this should be #ifdef'd out when checked in!
- #ifdef _DEBUG
- #define HEADER_FILL_CHAR 0xbf
- #endif
- // HEADER_FILL_CHAR should not be defined when checked in, so put out a warning
- // message if this is a release build
- #if defined(NDEBUG) && defined (HEADER_FILL_CHAR)
- #pragma message ("Warning: HEADER_FILL_CHAR defined for a release build")
- #endif
- static void allocate_batch(__cilkrts_worker *w, int bucket, size_t size);
- #ifndef _WIN32
- const unsigned short __cilkrts_bucket_sizes[FRAME_MALLOC_NBUCKETS] =
- {
- 64, 128, 256, 512, 1024, 2048
- };
- #define FRAME_MALLOC_BUCKET_TO_SIZE(bucket) __cilkrts_bucket_sizes[bucket]
- /* threshold above which we use slow malloc */
- #define FRAME_MALLOC_MAX_SIZE 2048
- #else // _WIN32
- /* Note that this must match the implementation of framesz_to_bucket in
- * asmilator/layout.ml! */
- #define FRAME_MALLOC_BUCKET_TO_SIZE(bucket) ((size_t)(64 << (bucket)))
- /* threshold above which we use slow malloc */
- #define FRAME_MALLOC_MAX_SIZE \
- FRAME_MALLOC_BUCKET_TO_SIZE(FRAME_MALLOC_NBUCKETS - 1)
- #endif // _WIN32
- /* utility procedures */
- static void push(struct free_list **b, struct free_list *p)
- {
- #ifdef HEADER_FILL_CHAR
- memset (p, HEADER_FILL_CHAR, FRAME_MALLOC_BUCKET_TO_SIZE(0));
- #endif
- /* cons! onto free list */
- p->cdr = *b;
- *b = p;
- }
- static struct free_list *pop(struct free_list **b)
- {
- struct free_list *p = *b;
- if (p)
- *b = p->cdr;
- return p;
- }
- /*************************************************************
- global allocator:
- *************************************************************/
- /* request slightly less than 2^K from the OS, which after malloc
- overhead and alignment should end up filling each VM page almost
- completely. 128 is a guess of the total malloc overhead and cache
- line alignment */
- #define FRAME_MALLOC_CHUNK (32 * 1024 - 128)
- /** Implements linked list of frames */
- struct pool_cons {
- char *p; /**< This element of the list */
- struct pool_cons *cdr; /**< Remainder of the list */
- };
- static void extend_global_pool(global_state_t *g)
- {
- /* FIXME: memalign to a cache line? */
- struct pool_cons *c = (struct pool_cons *)__cilkrts_malloc(sizeof(*c));
- g->frame_malloc.pool_begin =
- (char *)__cilkrts_malloc((size_t)FRAME_MALLOC_CHUNK);
- g->frame_malloc.pool_end =
- g->frame_malloc.pool_begin + FRAME_MALLOC_CHUNK;
- g->frame_malloc.allocated_from_os += FRAME_MALLOC_CHUNK;
- c->p = g->frame_malloc.pool_begin;
- c->cdr = g->frame_malloc.pool_list;
- g->frame_malloc.pool_list = c;
- }
- /* the size is already canonicalized at this point */
- static struct free_list *global_alloc(global_state_t *g, int bucket)
- {
- struct free_list *mem;
- size_t size;
- CILK_ASSERT(bucket < FRAME_MALLOC_NBUCKETS);
- size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket);
- g->frame_malloc.allocated_from_global_pool += size;
- if (!(mem = pop(&g->frame_malloc.global_free_list[bucket]))) {
- CILK_ASSERT(g->frame_malloc.pool_begin <= g->frame_malloc.pool_end);
- if (g->frame_malloc.pool_begin + size > g->frame_malloc.pool_end) {
- /* We waste the fragment of pool. */
- g->frame_malloc.wasted +=
- g->frame_malloc.pool_end - g->frame_malloc.pool_begin;
- extend_global_pool(g);
- }
- mem = (struct free_list *)g->frame_malloc.pool_begin;
- g->frame_malloc.pool_begin += size;
- }
- return mem;
- }
- static void global_free(global_state_t *g, void *mem, int bucket)
- {
- size_t size;
- CILK_ASSERT(bucket < FRAME_MALLOC_NBUCKETS);
- size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket);
- g->frame_malloc.allocated_from_global_pool -= size;
- push(&g->frame_malloc.global_free_list[bucket], mem);
- }
- void __cilkrts_frame_malloc_global_init(global_state_t *g)
- {
- int i;
- __cilkrts_mutex_init(&g->frame_malloc.lock);
- g->frame_malloc.check_for_leaks = 1;
- g->frame_malloc.pool_list = 0;
- g->frame_malloc.pool_begin = 0;
- g->frame_malloc.pool_end = 0;
- g->frame_malloc.batch_size = 8000;
- g->frame_malloc.potential_limit = 4 * g->frame_malloc.batch_size;
- g->frame_malloc.allocated_from_os = 0;
- g->frame_malloc.allocated_from_global_pool = 0;
- g->frame_malloc.wasted = 0;
- for (i = 0; i < FRAME_MALLOC_NBUCKETS; ++i)
- g->frame_malloc.global_free_list[i] = 0;
- }
- // Counts how many bytes are in the global free list.
- static size_t count_memory_in_global_list(global_state_t *g)
- {
- // Count the memory remaining in the global free list.
- size_t size_remaining_in_global_list = 0;
- int i;
- for (i = 0; i < FRAME_MALLOC_NBUCKETS; ++i) {
- struct free_list *p;
- size_t size_in_bucket = 0;
- p = g->frame_malloc.global_free_list[i];
- while (p) {
- size_in_bucket += FRAME_MALLOC_BUCKET_TO_SIZE(i);
- p = p->cdr;
- }
- size_remaining_in_global_list += size_in_bucket;
- }
- return size_remaining_in_global_list;
- }
- void __cilkrts_frame_malloc_global_cleanup(global_state_t *g)
- {
- struct pool_cons *c;
- if (g->frame_malloc.check_for_leaks) {
- size_t memory_in_global_list = count_memory_in_global_list(g);
- // TBD: This check is weak. Short of memory corruption,
- // I don't see how we have more memory in the free list
- // than allocated from the os.
- // Ideally, we should count the memory in the global free list
- // and check that we have it all. But I believe the runtime
- // itself also uses some memory, which is not being tracked.
- if (memory_in_global_list > g->frame_malloc.allocated_from_os) {
- __cilkrts_bug("\nError. The Cilk runtime data structures may have been corrupted.\n");
- }
- }
-
- while ((c = g->frame_malloc.pool_list)) {
- g->frame_malloc.pool_list = c->cdr;
- __cilkrts_free(c->p);
- __cilkrts_free(c);
- }
- __cilkrts_mutex_destroy(0, &g->frame_malloc.lock);
- // Check that all the memory moved from the global pool into
- // workers has been returned to the global pool.
- if (g->frame_malloc.check_for_leaks
- && (g->frame_malloc.allocated_from_global_pool != 0))
- {
- __cilkrts_bug("\n"
- "---------------------------" "\n"
- " MEMORY LEAK DETECTED!!! " "\n"
- "---------------------------" "\n"
- "\n"
- );
- }
- }
- /*************************************************************
- per-worker allocator
- *************************************************************/
- /* allocate a batch of frames of size SIZE from the global pool and
- store them in the worker's free list */
- static void allocate_batch(__cilkrts_worker *w, int bucket, size_t size)
- {
- global_state_t *g = w->g;
- __cilkrts_mutex_lock(w, &g->frame_malloc.lock); {
- #if USE_MMAP
- char *p = mmap(0, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
- if (p == MAP_FAILED)
- __cilkrts_bug("mmap failed %d", errno);
- assert(size < 4096);
- assert(p != MAP_FAILED);
- mprotect(p, 4096, PROT_NONE);
- mprotect(p + 8192, 4096, PROT_NONE);
- w->l->bucket_potential[bucket] += size;
- push(&w->l->free_list[bucket], (struct free_list *)(p + 8192 - size));
- #else
- size_t bytes_allocated = 0;
- do {
- w->l->bucket_potential[bucket] += size;
- bytes_allocated += size;
- push(&w->l->free_list[bucket], global_alloc(g, bucket));
- } while (bytes_allocated < g->frame_malloc.batch_size);
- #endif
- } __cilkrts_mutex_unlock(w, &g->frame_malloc.lock);
- }
- static void gc_bucket(__cilkrts_worker *w, int bucket, size_t size)
- {
- struct free_list *p, *q;
- global_state_t *g = w->g;
- size_t pot = w->l->bucket_potential[bucket];
- size_t newpot;
- /* Keep up to POT/2 elements in the free list. The cost of
- counting up to POT/2 is amortized against POT. */
- newpot = 0;
- for (newpot = 0, p = w->l->free_list[bucket]; p && 2 * newpot < pot;
- p = p->cdr, newpot += size)
- ;
- w->l->bucket_potential[bucket] = newpot;
- if (p) {
- /* free the rest of the list. The cost of grabbing the lock
- is amortized against POT/2; the cost of traversing the rest
- of the list is amortized against the free operation that
- puts the element on the list. */
- __cilkrts_mutex_lock(w, &g->frame_malloc.lock); {
- while ((q = pop(&p->cdr)))
- #if USE_MMAP
- munmap((char *)q + size - 8192, 12288);
- #else
- global_free(g, q, bucket);
- #endif
- } __cilkrts_mutex_unlock(w, &g->frame_malloc.lock);
- }
- }
- // Free all the memory in this bucket for the specified worker,
- // returning it to the global pool's free list.
- static void move_bucket_to_global_free_list(__cilkrts_worker *w,
- int bucket)
- {
- struct free_list *p, *q;
- global_state_t *g = w->g;
- p = w->l->free_list[bucket];
-
- if (p) {
- __cilkrts_mutex_lock(w, &g->frame_malloc.lock); {
- while ((q = pop(&p))) {
- #if USE_MMAP
- size_t size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket);
- munmap((char *)q + size - 8192, 12288);
- #else
- global_free(g, q, bucket);
- #endif
- }
- } __cilkrts_mutex_unlock(w, &g->frame_malloc.lock);
- }
- // I'm not sure this does anything useful now, since
- // the worker is about to be destroyed. But why not?
- w->l->bucket_potential[bucket] = 0;
- }
- static int bucket_of_size(size_t size)
- {
- int i;
- for (i = 0; i < FRAME_MALLOC_NBUCKETS; ++i)
- if (size <= FRAME_MALLOC_BUCKET_TO_SIZE(i))
- return i;
- CILK_ASSERT(0 /* can't happen */);
- return -1;
- }
- size_t __cilkrts_frame_malloc_roundup(size_t size)
- {
- if (size > FRAME_MALLOC_MAX_SIZE) {
- /* nothing, leave it alone */
- } else {
- int bucket = bucket_of_size(size);
- size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket);
- }
- return size;
- }
- size_t __cilkrts_size_of_bucket(int bucket)
- {
- CILK_ASSERT(bucket >= 0 && bucket < FRAME_MALLOC_NBUCKETS);
- return FRAME_MALLOC_BUCKET_TO_SIZE(bucket);
- }
- void *__cilkrts_frame_malloc(__cilkrts_worker *w, size_t size)
- {
- int bucket;
- void *mem;
- /* if too large, or if no worker, fall back to __cilkrts_malloc() */
- if (!w || size > FRAME_MALLOC_MAX_SIZE) {
- NOTE_INTERVAL(w, INTERVAL_FRAME_ALLOC_LARGE);
- return __cilkrts_malloc(size);
- }
- START_INTERVAL(w, INTERVAL_FRAME_ALLOC); {
- bucket = bucket_of_size(size);
- size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket);
- while (!(mem = pop(&w->l->free_list[bucket]))) {
- /* get a batch of frames from the global pool */
- START_INTERVAL(w, INTERVAL_FRAME_ALLOC_GLOBAL) {
- allocate_batch(w, bucket, size);
- } STOP_INTERVAL(w, INTERVAL_FRAME_ALLOC_GLOBAL);
- }
- } STOP_INTERVAL(w, INTERVAL_FRAME_ALLOC);
- return mem;
- }
- void __cilkrts_frame_free(__cilkrts_worker *w, void *p0, size_t size)
- {
- int bucket;
- struct free_list *p = (struct free_list *)p0;
- /* if too large, or if no worker, fall back to __cilkrts_free() */
- if (!w || size > FRAME_MALLOC_MAX_SIZE) {
- NOTE_INTERVAL(w, INTERVAL_FRAME_FREE_LARGE);
- __cilkrts_free(p);
- return;
- }
- #if CILK_LIB_DEBUG
- *(volatile long *)w;
- #endif
- START_INTERVAL(w, INTERVAL_FRAME_FREE); {
- bucket = bucket_of_size(size);
- size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket);
- w->l->bucket_potential[bucket] += size;
- push(&w->l->free_list[bucket], p);
- if (w->l->bucket_potential[bucket] >
- w->g->frame_malloc.potential_limit) {
- START_INTERVAL(w, INTERVAL_FRAME_FREE_GLOBAL) {
- gc_bucket(w, bucket, size);
- } STOP_INTERVAL(w, INTERVAL_FRAME_FREE_GLOBAL);
- }
- } STOP_INTERVAL(w, INTERVAL_FRAME_FREE);
- }
- void __cilkrts_frame_malloc_per_worker_init(__cilkrts_worker *w)
- {
- int i;
- local_state *l = w->l;
- for (i = 0; i < FRAME_MALLOC_NBUCKETS; ++i) {
- l->free_list[i] = 0;
- l->bucket_potential[i] = 0;
- }
- }
- void __cilkrts_frame_malloc_per_worker_cleanup(__cilkrts_worker *w)
- {
- int i;
- // Move memory to the global pool. This operation
- // ensures the memory does not become unreachable / leak
- // when the worker is destroyed.
- for (i = 0; i < FRAME_MALLOC_NBUCKETS; ++i) {
- move_bucket_to_global_free_list(w, i);
- }
- }
- /*
- Local Variables: **
- c-file-style:"bsd" **
- c-basic-offset:4 **
- indent-tabs-mode:nil **
- End: **
- */
|