frame_malloc.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463
  1. /* frame_malloc.c -*-C-*-
  2. *
  3. *************************************************************************
  4. *
  5. * @copyright
  6. * Copyright (C) 2009-2013, Intel Corporation
  7. * All rights reserved.
  8. *
  9. * @copyright
  10. * Redistribution and use in source and binary forms, with or without
  11. * modification, are permitted provided that the following conditions
  12. * are met:
  13. *
  14. * * Redistributions of source code must retain the above copyright
  15. * notice, this list of conditions and the following disclaimer.
  16. * * Redistributions in binary form must reproduce the above copyright
  17. * notice, this list of conditions and the following disclaimer in
  18. * the documentation and/or other materials provided with the
  19. * distribution.
  20. * * Neither the name of Intel Corporation nor the names of its
  21. * contributors may be used to endorse or promote products derived
  22. * from this software without specific prior written permission.
  23. *
  24. * @copyright
  25. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  26. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  27. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  28. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  29. * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  30. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  31. * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
  32. * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  33. * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  34. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
  35. * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  36. * POSSIBILITY OF SUCH DAMAGE.
  37. **************************************************************************/
  38. #include "frame_malloc.h"
  39. #include "bug.h"
  40. #include "local_state.h"
  41. #include "cilk_malloc.h"
  42. #ifndef __VXWORKS__
  43. #include <memory.h>
  44. #endif
  45. /* #define USE_MMAP 1 */
  46. #if USE_MMAP
  47. #define __USE_MISC 1
  48. #include <sys/mman.h>
  49. #include <errno.h>
  50. #endif
  51. // Define to fill the stack frame header with the fill character when pushing
  52. // it on a free list. Note that this should be #ifdef'd out when checked in!
  53. #ifdef _DEBUG
  54. #define HEADER_FILL_CHAR 0xbf
  55. #endif
  56. // HEADER_FILL_CHAR should not be defined when checked in, so put out a warning
  57. // message if this is a release build
  58. #if defined(NDEBUG) && defined (HEADER_FILL_CHAR)
  59. #pragma message ("Warning: HEADER_FILL_CHAR defined for a release build")
  60. #endif
  61. static void allocate_batch(__cilkrts_worker *w, int bucket, size_t size);
  62. #ifndef _WIN32
  63. const unsigned short __cilkrts_bucket_sizes[FRAME_MALLOC_NBUCKETS] =
  64. {
  65. 64, 128, 256, 512, 1024, 2048
  66. };
  67. #define FRAME_MALLOC_BUCKET_TO_SIZE(bucket) __cilkrts_bucket_sizes[bucket]
  68. /* threshold above which we use slow malloc */
  69. #define FRAME_MALLOC_MAX_SIZE 2048
  70. #else // _WIN32
  71. /* Note that this must match the implementation of framesz_to_bucket in
  72. * asmilator/layout.ml! */
  73. #define FRAME_MALLOC_BUCKET_TO_SIZE(bucket) ((size_t)(64 << (bucket)))
  74. /* threshold above which we use slow malloc */
  75. #define FRAME_MALLOC_MAX_SIZE \
  76. FRAME_MALLOC_BUCKET_TO_SIZE(FRAME_MALLOC_NBUCKETS - 1)
  77. #endif // _WIN32
  78. /* utility procedures */
  79. static void push(struct free_list **b, struct free_list *p)
  80. {
  81. #ifdef HEADER_FILL_CHAR
  82. memset (p, HEADER_FILL_CHAR, FRAME_MALLOC_BUCKET_TO_SIZE(0));
  83. #endif
  84. /* cons! onto free list */
  85. p->cdr = *b;
  86. *b = p;
  87. }
  88. static struct free_list *pop(struct free_list **b)
  89. {
  90. struct free_list *p = *b;
  91. if (p)
  92. *b = p->cdr;
  93. return p;
  94. }
  95. /*************************************************************
  96. global allocator:
  97. *************************************************************/
  98. /* request slightly less than 2^K from the OS, which after malloc
  99. overhead and alignment should end up filling each VM page almost
  100. completely. 128 is a guess of the total malloc overhead and cache
  101. line alignment */
  102. #define FRAME_MALLOC_CHUNK (32 * 1024 - 128)
  103. /** Implements linked list of frames */
  104. struct pool_cons {
  105. char *p; /**< This element of the list */
  106. struct pool_cons *cdr; /**< Remainder of the list */
  107. };
  108. static void extend_global_pool(global_state_t *g)
  109. {
  110. /* FIXME: memalign to a cache line? */
  111. struct pool_cons *c = (struct pool_cons *)__cilkrts_malloc(sizeof(*c));
  112. g->frame_malloc.pool_begin =
  113. (char *)__cilkrts_malloc((size_t)FRAME_MALLOC_CHUNK);
  114. g->frame_malloc.pool_end =
  115. g->frame_malloc.pool_begin + FRAME_MALLOC_CHUNK;
  116. g->frame_malloc.allocated_from_os += FRAME_MALLOC_CHUNK;
  117. c->p = g->frame_malloc.pool_begin;
  118. c->cdr = g->frame_malloc.pool_list;
  119. g->frame_malloc.pool_list = c;
  120. }
  121. /* the size is already canonicalized at this point */
  122. static struct free_list *global_alloc(global_state_t *g, int bucket)
  123. {
  124. struct free_list *mem;
  125. size_t size;
  126. CILK_ASSERT(bucket < FRAME_MALLOC_NBUCKETS);
  127. size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket);
  128. g->frame_malloc.allocated_from_global_pool += size;
  129. if (!(mem = pop(&g->frame_malloc.global_free_list[bucket]))) {
  130. CILK_ASSERT(g->frame_malloc.pool_begin <= g->frame_malloc.pool_end);
  131. if (g->frame_malloc.pool_begin + size > g->frame_malloc.pool_end) {
  132. /* We waste the fragment of pool. */
  133. g->frame_malloc.wasted +=
  134. g->frame_malloc.pool_end - g->frame_malloc.pool_begin;
  135. extend_global_pool(g);
  136. }
  137. mem = (struct free_list *)g->frame_malloc.pool_begin;
  138. g->frame_malloc.pool_begin += size;
  139. }
  140. return mem;
  141. }
  142. static void global_free(global_state_t *g, void *mem, int bucket)
  143. {
  144. size_t size;
  145. CILK_ASSERT(bucket < FRAME_MALLOC_NBUCKETS);
  146. size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket);
  147. g->frame_malloc.allocated_from_global_pool -= size;
  148. push(&g->frame_malloc.global_free_list[bucket], mem);
  149. }
  150. void __cilkrts_frame_malloc_global_init(global_state_t *g)
  151. {
  152. int i;
  153. __cilkrts_mutex_init(&g->frame_malloc.lock);
  154. g->frame_malloc.check_for_leaks = 1;
  155. g->frame_malloc.pool_list = 0;
  156. g->frame_malloc.pool_begin = 0;
  157. g->frame_malloc.pool_end = 0;
  158. g->frame_malloc.batch_size = 8000;
  159. g->frame_malloc.potential_limit = 4 * g->frame_malloc.batch_size;
  160. g->frame_malloc.allocated_from_os = 0;
  161. g->frame_malloc.allocated_from_global_pool = 0;
  162. g->frame_malloc.wasted = 0;
  163. for (i = 0; i < FRAME_MALLOC_NBUCKETS; ++i)
  164. g->frame_malloc.global_free_list[i] = 0;
  165. }
  166. // Counts how many bytes are in the global free list.
  167. static size_t count_memory_in_global_list(global_state_t *g)
  168. {
  169. // Count the memory remaining in the global free list.
  170. size_t size_remaining_in_global_list = 0;
  171. int i;
  172. for (i = 0; i < FRAME_MALLOC_NBUCKETS; ++i) {
  173. struct free_list *p;
  174. size_t size_in_bucket = 0;
  175. p = g->frame_malloc.global_free_list[i];
  176. while (p) {
  177. size_in_bucket += FRAME_MALLOC_BUCKET_TO_SIZE(i);
  178. p = p->cdr;
  179. }
  180. size_remaining_in_global_list += size_in_bucket;
  181. }
  182. return size_remaining_in_global_list;
  183. }
  184. void __cilkrts_frame_malloc_global_cleanup(global_state_t *g)
  185. {
  186. struct pool_cons *c;
  187. if (g->frame_malloc.check_for_leaks) {
  188. size_t memory_in_global_list = count_memory_in_global_list(g);
  189. // TBD: This check is weak. Short of memory corruption,
  190. // I don't see how we have more memory in the free list
  191. // than allocated from the os.
  192. // Ideally, we should count the memory in the global free list
  193. // and check that we have it all. But I believe the runtime
  194. // itself also uses some memory, which is not being tracked.
  195. if (memory_in_global_list > g->frame_malloc.allocated_from_os) {
  196. __cilkrts_bug("\nError. The Cilk runtime data structures may have been corrupted.\n");
  197. }
  198. }
  199. while ((c = g->frame_malloc.pool_list)) {
  200. g->frame_malloc.pool_list = c->cdr;
  201. __cilkrts_free(c->p);
  202. __cilkrts_free(c);
  203. }
  204. __cilkrts_mutex_destroy(0, &g->frame_malloc.lock);
  205. // Check that all the memory moved from the global pool into
  206. // workers has been returned to the global pool.
  207. if (g->frame_malloc.check_for_leaks
  208. && (g->frame_malloc.allocated_from_global_pool != 0))
  209. {
  210. __cilkrts_bug("\n"
  211. "---------------------------" "\n"
  212. " MEMORY LEAK DETECTED!!! " "\n"
  213. "---------------------------" "\n"
  214. "\n"
  215. );
  216. }
  217. }
  218. /*************************************************************
  219. per-worker allocator
  220. *************************************************************/
  221. /* allocate a batch of frames of size SIZE from the global pool and
  222. store them in the worker's free list */
  223. static void allocate_batch(__cilkrts_worker *w, int bucket, size_t size)
  224. {
  225. global_state_t *g = w->g;
  226. __cilkrts_mutex_lock(w, &g->frame_malloc.lock); {
  227. #if USE_MMAP
  228. char *p = mmap(0, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
  229. if (p == MAP_FAILED)
  230. __cilkrts_bug("mmap failed %d", errno);
  231. assert(size < 4096);
  232. assert(p != MAP_FAILED);
  233. mprotect(p, 4096, PROT_NONE);
  234. mprotect(p + 8192, 4096, PROT_NONE);
  235. w->l->bucket_potential[bucket] += size;
  236. push(&w->l->free_list[bucket], (struct free_list *)(p + 8192 - size));
  237. #else
  238. size_t bytes_allocated = 0;
  239. do {
  240. w->l->bucket_potential[bucket] += size;
  241. bytes_allocated += size;
  242. push(&w->l->free_list[bucket], global_alloc(g, bucket));
  243. } while (bytes_allocated < g->frame_malloc.batch_size);
  244. #endif
  245. } __cilkrts_mutex_unlock(w, &g->frame_malloc.lock);
  246. }
  247. static void gc_bucket(__cilkrts_worker *w, int bucket, size_t size)
  248. {
  249. struct free_list *p, *q;
  250. global_state_t *g = w->g;
  251. size_t pot = w->l->bucket_potential[bucket];
  252. size_t newpot;
  253. /* Keep up to POT/2 elements in the free list. The cost of
  254. counting up to POT/2 is amortized against POT. */
  255. newpot = 0;
  256. for (newpot = 0, p = w->l->free_list[bucket]; p && 2 * newpot < pot;
  257. p = p->cdr, newpot += size)
  258. ;
  259. w->l->bucket_potential[bucket] = newpot;
  260. if (p) {
  261. /* free the rest of the list. The cost of grabbing the lock
  262. is amortized against POT/2; the cost of traversing the rest
  263. of the list is amortized against the free operation that
  264. puts the element on the list. */
  265. __cilkrts_mutex_lock(w, &g->frame_malloc.lock); {
  266. while ((q = pop(&p->cdr)))
  267. #if USE_MMAP
  268. munmap((char *)q + size - 8192, 12288);
  269. #else
  270. global_free(g, q, bucket);
  271. #endif
  272. } __cilkrts_mutex_unlock(w, &g->frame_malloc.lock);
  273. }
  274. }
  275. // Free all the memory in this bucket for the specified worker,
  276. // returning it to the global pool's free list.
  277. static void move_bucket_to_global_free_list(__cilkrts_worker *w,
  278. int bucket)
  279. {
  280. struct free_list *p, *q;
  281. global_state_t *g = w->g;
  282. p = w->l->free_list[bucket];
  283. if (p) {
  284. __cilkrts_mutex_lock(w, &g->frame_malloc.lock); {
  285. while ((q = pop(&p))) {
  286. #if USE_MMAP
  287. size_t size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket);
  288. munmap((char *)q + size - 8192, 12288);
  289. #else
  290. global_free(g, q, bucket);
  291. #endif
  292. }
  293. } __cilkrts_mutex_unlock(w, &g->frame_malloc.lock);
  294. }
  295. // I'm not sure this does anything useful now, since
  296. // the worker is about to be destroyed. But why not?
  297. w->l->bucket_potential[bucket] = 0;
  298. }
  299. static int bucket_of_size(size_t size)
  300. {
  301. int i;
  302. for (i = 0; i < FRAME_MALLOC_NBUCKETS; ++i)
  303. if (size <= FRAME_MALLOC_BUCKET_TO_SIZE(i))
  304. return i;
  305. CILK_ASSERT(0 /* can't happen */);
  306. return -1;
  307. }
  308. size_t __cilkrts_frame_malloc_roundup(size_t size)
  309. {
  310. if (size > FRAME_MALLOC_MAX_SIZE) {
  311. /* nothing, leave it alone */
  312. } else {
  313. int bucket = bucket_of_size(size);
  314. size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket);
  315. }
  316. return size;
  317. }
  318. size_t __cilkrts_size_of_bucket(int bucket)
  319. {
  320. CILK_ASSERT(bucket >= 0 && bucket < FRAME_MALLOC_NBUCKETS);
  321. return FRAME_MALLOC_BUCKET_TO_SIZE(bucket);
  322. }
  323. void *__cilkrts_frame_malloc(__cilkrts_worker *w, size_t size)
  324. {
  325. int bucket;
  326. void *mem;
  327. /* if too large, or if no worker, fall back to __cilkrts_malloc() */
  328. if (!w || size > FRAME_MALLOC_MAX_SIZE) {
  329. NOTE_INTERVAL(w, INTERVAL_FRAME_ALLOC_LARGE);
  330. return __cilkrts_malloc(size);
  331. }
  332. START_INTERVAL(w, INTERVAL_FRAME_ALLOC); {
  333. bucket = bucket_of_size(size);
  334. size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket);
  335. while (!(mem = pop(&w->l->free_list[bucket]))) {
  336. /* get a batch of frames from the global pool */
  337. START_INTERVAL(w, INTERVAL_FRAME_ALLOC_GLOBAL) {
  338. allocate_batch(w, bucket, size);
  339. } STOP_INTERVAL(w, INTERVAL_FRAME_ALLOC_GLOBAL);
  340. }
  341. } STOP_INTERVAL(w, INTERVAL_FRAME_ALLOC);
  342. return mem;
  343. }
  344. void __cilkrts_frame_free(__cilkrts_worker *w, void *p0, size_t size)
  345. {
  346. int bucket;
  347. struct free_list *p = (struct free_list *)p0;
  348. /* if too large, or if no worker, fall back to __cilkrts_free() */
  349. if (!w || size > FRAME_MALLOC_MAX_SIZE) {
  350. NOTE_INTERVAL(w, INTERVAL_FRAME_FREE_LARGE);
  351. __cilkrts_free(p);
  352. return;
  353. }
  354. #if CILK_LIB_DEBUG
  355. *(volatile long *)w;
  356. #endif
  357. START_INTERVAL(w, INTERVAL_FRAME_FREE); {
  358. bucket = bucket_of_size(size);
  359. size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket);
  360. w->l->bucket_potential[bucket] += size;
  361. push(&w->l->free_list[bucket], p);
  362. if (w->l->bucket_potential[bucket] >
  363. w->g->frame_malloc.potential_limit) {
  364. START_INTERVAL(w, INTERVAL_FRAME_FREE_GLOBAL) {
  365. gc_bucket(w, bucket, size);
  366. } STOP_INTERVAL(w, INTERVAL_FRAME_FREE_GLOBAL);
  367. }
  368. } STOP_INTERVAL(w, INTERVAL_FRAME_FREE);
  369. }
  370. void __cilkrts_frame_malloc_per_worker_init(__cilkrts_worker *w)
  371. {
  372. int i;
  373. local_state *l = w->l;
  374. for (i = 0; i < FRAME_MALLOC_NBUCKETS; ++i) {
  375. l->free_list[i] = 0;
  376. l->bucket_potential[i] = 0;
  377. }
  378. }
  379. void __cilkrts_frame_malloc_per_worker_cleanup(__cilkrts_worker *w)
  380. {
  381. int i;
  382. // Move memory to the global pool. This operation
  383. // ensures the memory does not become unreachable / leak
  384. // when the worker is destroyed.
  385. for (i = 0; i < FRAME_MALLOC_NBUCKETS; ++i) {
  386. move_bucket_to_global_free_list(w, i);
  387. }
  388. }
  389. /*
  390. Local Variables: **
  391. c-file-style:"bsd" **
  392. c-basic-offset:4 **
  393. indent-tabs-mode:nil **
  394. End: **
  395. */