malloc.goc 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993
  1. // Copyright 2009 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // See malloc.h for overview.
  5. //
  6. // TODO(rsc): double-check stats.
  7. package runtime
  8. #include <stddef.h>
  9. #include <errno.h>
  10. #include <stdlib.h>
  11. #include "go-alloc.h"
  12. #include "runtime.h"
  13. #include "arch.h"
  14. #include "malloc.h"
  15. #include "interface.h"
  16. #include "go-type.h"
  17. // Map gccgo field names to gc field names.
  18. // Eface aka __go_empty_interface.
  19. #define type __type_descriptor
  20. // Type aka __go_type_descriptor
  21. #define kind __code
  22. #define string __reflection
  23. #define KindPtr GO_PTR
  24. #define KindNoPointers GO_NO_POINTERS
  25. #define kindMask GO_CODE_MASK
  26. // GCCGO SPECIFIC CHANGE
  27. //
  28. // There is a long comment in runtime_mallocinit about where to put the heap
  29. // on a 64-bit system. It makes assumptions that are not valid on linux/arm64
  30. // -- it assumes user space can choose the lower 47 bits of a pointer, but on
  31. // linux/arm64 we can only choose the lower 39 bits. This means the heap is
  32. // roughly a quarter of the available address space and we cannot choose a bit
  33. // pattern that all pointers will have -- luckily the GC is mostly precise
  34. // these days so this doesn't matter all that much. The kernel (as of 3.13)
  35. // will allocate address space starting either down from 0x7fffffffff or up
  36. // from 0x2000000000, so we put the heap roughly in the middle of these two
  37. // addresses to minimize the chance that a non-heap allocation will get in the
  38. // way of the heap.
  39. //
  40. // This all means that there isn't much point in trying 256 different
  41. // locations for the heap on such systems.
  42. #ifdef __aarch64__
  43. #define HeapBase(i) ((void*)(uintptr)(0x40ULL<<32))
  44. #define HeapBaseOptions 1
  45. #else
  46. #define HeapBase(i) ((void*)(uintptr)(i<<40|0x00c0ULL<<32))
  47. #define HeapBaseOptions 0x80
  48. #endif
  49. // END GCCGO SPECIFIC CHANGE
  50. // Mark mheap as 'no pointers', it does not contain interesting pointers but occupies ~45K.
  51. MHeap runtime_mheap;
  52. MStats mstats;
  53. int32 runtime_checking;
  54. extern MStats mstats; // defined in zruntime_def_$GOOS_$GOARCH.go
  55. extern volatile intgo runtime_MemProfileRate
  56. __asm__ (GOSYM_PREFIX "runtime.MemProfileRate");
  57. static MSpan* largealloc(uint32, uintptr*);
  58. static void profilealloc(void *v, uintptr size);
  59. static void settype(MSpan *s, void *v, uintptr typ);
  60. // Allocate an object of at least size bytes.
  61. // Small objects are allocated from the per-thread cache's free lists.
  62. // Large objects (> 32 kB) are allocated straight from the heap.
  63. // If the block will be freed with runtime_free(), typ must be 0.
  64. void*
  65. runtime_mallocgc(uintptr size, uintptr typ, uint32 flag)
  66. {
  67. M *m;
  68. G *g;
  69. int32 sizeclass;
  70. uintptr tinysize, size1;
  71. intgo rate;
  72. MCache *c;
  73. MSpan *s;
  74. MLink *v, *next;
  75. byte *tiny;
  76. bool incallback;
  77. if(size == 0) {
  78. // All 0-length allocations use this pointer.
  79. // The language does not require the allocations to
  80. // have distinct values.
  81. return &runtime_zerobase;
  82. }
  83. m = runtime_m();
  84. g = runtime_g();
  85. incallback = false;
  86. if(m->mcache == nil && g->ncgo > 0) {
  87. // For gccgo this case can occur when a cgo or SWIG function
  88. // has an interface return type and the function
  89. // returns a non-pointer, so memory allocation occurs
  90. // after syscall.Cgocall but before syscall.CgocallDone.
  91. // We treat it as a callback.
  92. runtime_exitsyscall();
  93. m = runtime_m();
  94. incallback = true;
  95. flag |= FlagNoInvokeGC;
  96. }
  97. if(runtime_gcwaiting() && g != m->g0 && m->locks == 0 && !(flag & FlagNoInvokeGC)) {
  98. runtime_gosched();
  99. m = runtime_m();
  100. }
  101. if(m->mallocing)
  102. runtime_throw("malloc/free - deadlock");
  103. // Disable preemption during settype.
  104. // We can not use m->mallocing for this, because settype calls mallocgc.
  105. m->locks++;
  106. m->mallocing = 1;
  107. if(DebugTypeAtBlockEnd)
  108. size += sizeof(uintptr);
  109. c = m->mcache;
  110. if(!runtime_debug.efence && size <= MaxSmallSize) {
  111. if((flag&(FlagNoScan|FlagNoGC)) == FlagNoScan && size < TinySize) {
  112. // Tiny allocator.
  113. //
  114. // Tiny allocator combines several tiny allocation requests
  115. // into a single memory block. The resulting memory block
  116. // is freed when all subobjects are unreachable. The subobjects
  117. // must be FlagNoScan (don't have pointers), this ensures that
  118. // the amount of potentially wasted memory is bounded.
  119. //
  120. // Size of the memory block used for combining (TinySize) is tunable.
  121. // Current setting is 16 bytes, which relates to 2x worst case memory
  122. // wastage (when all but one subobjects are unreachable).
  123. // 8 bytes would result in no wastage at all, but provides less
  124. // opportunities for combining.
  125. // 32 bytes provides more opportunities for combining,
  126. // but can lead to 4x worst case wastage.
  127. // The best case winning is 8x regardless of block size.
  128. //
  129. // Objects obtained from tiny allocator must not be freed explicitly.
  130. // So when an object will be freed explicitly, we ensure that
  131. // its size >= TinySize.
  132. //
  133. // SetFinalizer has a special case for objects potentially coming
  134. // from tiny allocator, it such case it allows to set finalizers
  135. // for an inner byte of a memory block.
  136. //
  137. // The main targets of tiny allocator are small strings and
  138. // standalone escaping variables. On a json benchmark
  139. // the allocator reduces number of allocations by ~12% and
  140. // reduces heap size by ~20%.
  141. tinysize = c->tinysize;
  142. if(size <= tinysize) {
  143. tiny = c->tiny;
  144. // Align tiny pointer for required (conservative) alignment.
  145. if((size&7) == 0)
  146. tiny = (byte*)ROUND((uintptr)tiny, 8);
  147. else if((size&3) == 0)
  148. tiny = (byte*)ROUND((uintptr)tiny, 4);
  149. else if((size&1) == 0)
  150. tiny = (byte*)ROUND((uintptr)tiny, 2);
  151. size1 = size + (tiny - c->tiny);
  152. if(size1 <= tinysize) {
  153. // The object fits into existing tiny block.
  154. v = (MLink*)tiny;
  155. c->tiny += size1;
  156. c->tinysize -= size1;
  157. m->mallocing = 0;
  158. m->locks--;
  159. if(incallback)
  160. runtime_entersyscall();
  161. return v;
  162. }
  163. }
  164. // Allocate a new TinySize block.
  165. s = c->alloc[TinySizeClass];
  166. if(s->freelist == nil)
  167. s = runtime_MCache_Refill(c, TinySizeClass);
  168. v = s->freelist;
  169. next = v->next;
  170. s->freelist = next;
  171. s->ref++;
  172. if(next != nil) // prefetching nil leads to a DTLB miss
  173. PREFETCH(next);
  174. ((uint64*)v)[0] = 0;
  175. ((uint64*)v)[1] = 0;
  176. // See if we need to replace the existing tiny block with the new one
  177. // based on amount of remaining free space.
  178. if(TinySize-size > tinysize) {
  179. c->tiny = (byte*)v + size;
  180. c->tinysize = TinySize - size;
  181. }
  182. size = TinySize;
  183. goto done;
  184. }
  185. // Allocate from mcache free lists.
  186. // Inlined version of SizeToClass().
  187. if(size <= 1024-8)
  188. sizeclass = runtime_size_to_class8[(size+7)>>3];
  189. else
  190. sizeclass = runtime_size_to_class128[(size-1024+127) >> 7];
  191. size = runtime_class_to_size[sizeclass];
  192. s = c->alloc[sizeclass];
  193. if(s->freelist == nil)
  194. s = runtime_MCache_Refill(c, sizeclass);
  195. v = s->freelist;
  196. next = v->next;
  197. s->freelist = next;
  198. s->ref++;
  199. if(next != nil) // prefetching nil leads to a DTLB miss
  200. PREFETCH(next);
  201. if(!(flag & FlagNoZero)) {
  202. v->next = nil;
  203. // block is zeroed iff second word is zero ...
  204. if(size > 2*sizeof(uintptr) && ((uintptr*)v)[1] != 0)
  205. runtime_memclr((byte*)v, size);
  206. }
  207. done:
  208. c->local_cachealloc += size;
  209. } else {
  210. // Allocate directly from heap.
  211. s = largealloc(flag, &size);
  212. v = (void*)(s->start << PageShift);
  213. }
  214. if(flag & FlagNoGC)
  215. runtime_marknogc(v);
  216. else if(!(flag & FlagNoScan))
  217. runtime_markscan(v);
  218. if(DebugTypeAtBlockEnd)
  219. *(uintptr*)((uintptr)v+size-sizeof(uintptr)) = typ;
  220. m->mallocing = 0;
  221. // TODO: save type even if FlagNoScan? Potentially expensive but might help
  222. // heap profiling/tracing.
  223. if(UseSpanType && !(flag & FlagNoScan) && typ != 0)
  224. settype(s, v, typ);
  225. if(runtime_debug.allocfreetrace)
  226. runtime_tracealloc(v, size, typ);
  227. if(!(flag & FlagNoProfiling) && (rate = runtime_MemProfileRate) > 0) {
  228. if(size < (uintptr)rate && size < (uintptr)(uint32)c->next_sample)
  229. c->next_sample -= size;
  230. else
  231. profilealloc(v, size);
  232. }
  233. m->locks--;
  234. if(!(flag & FlagNoInvokeGC) && mstats.heap_alloc >= mstats.next_gc)
  235. runtime_gc(0);
  236. if(incallback)
  237. runtime_entersyscall();
  238. return v;
  239. }
  240. static MSpan*
  241. largealloc(uint32 flag, uintptr *sizep)
  242. {
  243. uintptr npages, size;
  244. MSpan *s;
  245. void *v;
  246. // Allocate directly from heap.
  247. size = *sizep;
  248. if(size + PageSize < size)
  249. runtime_throw("out of memory");
  250. npages = size >> PageShift;
  251. if((size & PageMask) != 0)
  252. npages++;
  253. s = runtime_MHeap_Alloc(&runtime_mheap, npages, 0, 1, !(flag & FlagNoZero));
  254. if(s == nil)
  255. runtime_throw("out of memory");
  256. s->limit = (byte*)(s->start<<PageShift) + size;
  257. *sizep = npages<<PageShift;
  258. v = (void*)(s->start << PageShift);
  259. // setup for mark sweep
  260. runtime_markspan(v, 0, 0, true);
  261. return s;
  262. }
  263. static void
  264. profilealloc(void *v, uintptr size)
  265. {
  266. uintptr rate;
  267. int32 next;
  268. MCache *c;
  269. c = runtime_m()->mcache;
  270. rate = runtime_MemProfileRate;
  271. if(size < rate) {
  272. // pick next profile time
  273. // If you change this, also change allocmcache.
  274. if(rate > 0x3fffffff) // make 2*rate not overflow
  275. rate = 0x3fffffff;
  276. next = runtime_fastrand1() % (2*rate);
  277. // Subtract the "remainder" of the current allocation.
  278. // Otherwise objects that are close in size to sampling rate
  279. // will be under-sampled, because we consistently discard this remainder.
  280. next -= (size - c->next_sample);
  281. if(next < 0)
  282. next = 0;
  283. c->next_sample = next;
  284. }
  285. runtime_MProf_Malloc(v, size);
  286. }
  287. void*
  288. __go_alloc(uintptr size)
  289. {
  290. return runtime_mallocgc(size, 0, FlagNoInvokeGC);
  291. }
  292. // Free the object whose base pointer is v.
  293. void
  294. __go_free(void *v)
  295. {
  296. M *m;
  297. int32 sizeclass;
  298. MSpan *s;
  299. MCache *c;
  300. uintptr size;
  301. if(v == nil)
  302. return;
  303. // If you change this also change mgc0.c:/^sweep,
  304. // which has a copy of the guts of free.
  305. m = runtime_m();
  306. if(m->mallocing)
  307. runtime_throw("malloc/free - deadlock");
  308. m->mallocing = 1;
  309. if(!runtime_mlookup(v, nil, nil, &s)) {
  310. runtime_printf("free %p: not an allocated block\n", v);
  311. runtime_throw("free runtime_mlookup");
  312. }
  313. size = s->elemsize;
  314. sizeclass = s->sizeclass;
  315. // Objects that are smaller than TinySize can be allocated using tiny alloc,
  316. // if then such object is combined with an object with finalizer, we will crash.
  317. if(size < TinySize)
  318. runtime_throw("freeing too small block");
  319. if(runtime_debug.allocfreetrace)
  320. runtime_tracefree(v, size);
  321. // Ensure that the span is swept.
  322. // If we free into an unswept span, we will corrupt GC bitmaps.
  323. runtime_MSpan_EnsureSwept(s);
  324. if(s->specials != nil)
  325. runtime_freeallspecials(s, v, size);
  326. c = m->mcache;
  327. if(sizeclass == 0) {
  328. // Large object.
  329. s->needzero = 1;
  330. // Must mark v freed before calling unmarkspan and MHeap_Free:
  331. // they might coalesce v into other spans and change the bitmap further.
  332. runtime_markfreed(v);
  333. runtime_unmarkspan(v, 1<<PageShift);
  334. // NOTE(rsc,dvyukov): The original implementation of efence
  335. // in CL 22060046 used SysFree instead of SysFault, so that
  336. // the operating system would eventually give the memory
  337. // back to us again, so that an efence program could run
  338. // longer without running out of memory. Unfortunately,
  339. // calling SysFree here without any kind of adjustment of the
  340. // heap data structures means that when the memory does
  341. // come back to us, we have the wrong metadata for it, either in
  342. // the MSpan structures or in the garbage collection bitmap.
  343. // Using SysFault here means that the program will run out of
  344. // memory fairly quickly in efence mode, but at least it won't
  345. // have mysterious crashes due to confused memory reuse.
  346. // It should be possible to switch back to SysFree if we also
  347. // implement and then call some kind of MHeap_DeleteSpan.
  348. if(runtime_debug.efence)
  349. runtime_SysFault((void*)(s->start<<PageShift), size);
  350. else
  351. runtime_MHeap_Free(&runtime_mheap, s, 1);
  352. c->local_nlargefree++;
  353. c->local_largefree += size;
  354. } else {
  355. // Small object.
  356. if(size > 2*sizeof(uintptr))
  357. ((uintptr*)v)[1] = (uintptr)0xfeedfeedfeedfeedll; // mark as "needs to be zeroed"
  358. else if(size > sizeof(uintptr))
  359. ((uintptr*)v)[1] = 0;
  360. // Must mark v freed before calling MCache_Free:
  361. // it might coalesce v and other blocks into a bigger span
  362. // and change the bitmap further.
  363. c->local_nsmallfree[sizeclass]++;
  364. c->local_cachealloc -= size;
  365. if(c->alloc[sizeclass] == s) {
  366. // We own the span, so we can just add v to the freelist
  367. runtime_markfreed(v);
  368. ((MLink*)v)->next = s->freelist;
  369. s->freelist = v;
  370. s->ref--;
  371. } else {
  372. // Someone else owns this span. Add to free queue.
  373. runtime_MCache_Free(c, v, sizeclass, size);
  374. }
  375. }
  376. m->mallocing = 0;
  377. }
  378. int32
  379. runtime_mlookup(void *v, byte **base, uintptr *size, MSpan **sp)
  380. {
  381. M *m;
  382. uintptr n, i;
  383. byte *p;
  384. MSpan *s;
  385. m = runtime_m();
  386. m->mcache->local_nlookup++;
  387. if (sizeof(void*) == 4 && m->mcache->local_nlookup >= (1<<30)) {
  388. // purge cache stats to prevent overflow
  389. runtime_lock(&runtime_mheap);
  390. runtime_purgecachedstats(m->mcache);
  391. runtime_unlock(&runtime_mheap);
  392. }
  393. s = runtime_MHeap_LookupMaybe(&runtime_mheap, v);
  394. if(sp)
  395. *sp = s;
  396. if(s == nil) {
  397. runtime_checkfreed(v, 1);
  398. if(base)
  399. *base = nil;
  400. if(size)
  401. *size = 0;
  402. return 0;
  403. }
  404. p = (byte*)((uintptr)s->start<<PageShift);
  405. if(s->sizeclass == 0) {
  406. // Large object.
  407. if(base)
  408. *base = p;
  409. if(size)
  410. *size = s->npages<<PageShift;
  411. return 1;
  412. }
  413. n = s->elemsize;
  414. if(base) {
  415. i = ((byte*)v - p)/n;
  416. *base = p + i*n;
  417. }
  418. if(size)
  419. *size = n;
  420. return 1;
  421. }
  422. void
  423. runtime_purgecachedstats(MCache *c)
  424. {
  425. MHeap *h;
  426. int32 i;
  427. // Protected by either heap or GC lock.
  428. h = &runtime_mheap;
  429. mstats.heap_alloc += c->local_cachealloc;
  430. c->local_cachealloc = 0;
  431. mstats.nlookup += c->local_nlookup;
  432. c->local_nlookup = 0;
  433. h->largefree += c->local_largefree;
  434. c->local_largefree = 0;
  435. h->nlargefree += c->local_nlargefree;
  436. c->local_nlargefree = 0;
  437. for(i=0; i<(int32)nelem(c->local_nsmallfree); i++) {
  438. h->nsmallfree[i] += c->local_nsmallfree[i];
  439. c->local_nsmallfree[i] = 0;
  440. }
  441. }
  442. extern uintptr runtime_sizeof_C_MStats
  443. __asm__ (GOSYM_PREFIX "runtime.Sizeof_C_MStats");
  444. // Size of the trailing by_size array differs between Go and C,
  445. // NumSizeClasses was changed, but we can not change Go struct because of backward compatibility.
  446. // sizeof_C_MStats is what C thinks about size of Go struct.
  447. // Initialized in mallocinit because it's defined in go/runtime/mem.go.
  448. #define MaxArena32 (2U<<30)
  449. void
  450. runtime_mallocinit(void)
  451. {
  452. byte *p, *p1;
  453. uintptr arena_size, bitmap_size, spans_size, p_size;
  454. extern byte _end[];
  455. uintptr limit;
  456. uint64 i;
  457. bool reserved;
  458. runtime_sizeof_C_MStats = sizeof(MStats) - (NumSizeClasses - 61) * sizeof(mstats.by_size[0]);
  459. p = nil;
  460. p_size = 0;
  461. arena_size = 0;
  462. bitmap_size = 0;
  463. spans_size = 0;
  464. reserved = false;
  465. // for 64-bit build
  466. USED(p);
  467. USED(p_size);
  468. USED(arena_size);
  469. USED(bitmap_size);
  470. USED(spans_size);
  471. runtime_InitSizes();
  472. if(runtime_class_to_size[TinySizeClass] != TinySize)
  473. runtime_throw("bad TinySizeClass");
  474. // limit = runtime_memlimit();
  475. // See https://code.google.com/p/go/issues/detail?id=5049
  476. // TODO(rsc): Fix after 1.1.
  477. limit = 0;
  478. // Set up the allocation arena, a contiguous area of memory where
  479. // allocated data will be found. The arena begins with a bitmap large
  480. // enough to hold 4 bits per allocated word.
  481. if(sizeof(void*) == 8 && (limit == 0 || limit > (1<<30))) {
  482. // On a 64-bit machine, allocate from a single contiguous reservation.
  483. // 128 GB (MaxMem) should be big enough for now.
  484. //
  485. // The code will work with the reservation at any address, but ask
  486. // SysReserve to use 0x0000XXc000000000 if possible (XX=00...7f).
  487. // Allocating a 128 GB region takes away 37 bits, and the amd64
  488. // doesn't let us choose the top 17 bits, so that leaves the 11 bits
  489. // in the middle of 0x00c0 for us to choose. Choosing 0x00c0 means
  490. // that the valid memory addresses will begin 0x00c0, 0x00c1, ..., 0x00df.
  491. // In little-endian, that's c0 00, c1 00, ..., df 00. None of those are valid
  492. // UTF-8 sequences, and they are otherwise as far away from
  493. // ff (likely a common byte) as possible. If that fails, we try other 0xXXc0
  494. // addresses. An earlier attempt to use 0x11f8 caused out of memory errors
  495. // on OS X during thread allocations. 0x00c0 causes conflicts with
  496. // AddressSanitizer which reserves all memory up to 0x0100.
  497. // These choices are both for debuggability and to reduce the
  498. // odds of the conservative garbage collector not collecting memory
  499. // because some non-pointer block of memory had a bit pattern
  500. // that matched a memory address.
  501. //
  502. // Actually we reserve 136 GB (because the bitmap ends up being 8 GB)
  503. // but it hardly matters: e0 00 is not valid UTF-8 either.
  504. //
  505. // If this fails we fall back to the 32 bit memory mechanism
  506. arena_size = MaxMem;
  507. bitmap_size = arena_size / (sizeof(void*)*8/4);
  508. spans_size = arena_size / PageSize * sizeof(runtime_mheap.spans[0]);
  509. spans_size = ROUND(spans_size, PageSize);
  510. for(i = 0; i < HeapBaseOptions; i++) {
  511. p = HeapBase(i);
  512. p_size = bitmap_size + spans_size + arena_size + PageSize;
  513. p = runtime_SysReserve(p, p_size, &reserved);
  514. if(p != nil)
  515. break;
  516. }
  517. }
  518. if (p == nil) {
  519. // On a 32-bit machine, we can't typically get away
  520. // with a giant virtual address space reservation.
  521. // Instead we map the memory information bitmap
  522. // immediately after the data segment, large enough
  523. // to handle another 2GB of mappings (256 MB),
  524. // along with a reservation for another 512 MB of memory.
  525. // When that gets used up, we'll start asking the kernel
  526. // for any memory anywhere and hope it's in the 2GB
  527. // following the bitmap (presumably the executable begins
  528. // near the bottom of memory, so we'll have to use up
  529. // most of memory before the kernel resorts to giving out
  530. // memory before the beginning of the text segment).
  531. //
  532. // Alternatively we could reserve 512 MB bitmap, enough
  533. // for 4GB of mappings, and then accept any memory the
  534. // kernel threw at us, but normally that's a waste of 512 MB
  535. // of address space, which is probably too much in a 32-bit world.
  536. bitmap_size = MaxArena32 / (sizeof(void*)*8/4);
  537. arena_size = 512<<20;
  538. spans_size = MaxArena32 / PageSize * sizeof(runtime_mheap.spans[0]);
  539. if(limit > 0 && arena_size+bitmap_size+spans_size > limit) {
  540. bitmap_size = (limit / 9) & ~((1<<PageShift) - 1);
  541. arena_size = bitmap_size * 8;
  542. spans_size = arena_size / PageSize * sizeof(runtime_mheap.spans[0]);
  543. }
  544. spans_size = ROUND(spans_size, PageSize);
  545. // SysReserve treats the address we ask for, end, as a hint,
  546. // not as an absolute requirement. If we ask for the end
  547. // of the data segment but the operating system requires
  548. // a little more space before we can start allocating, it will
  549. // give out a slightly higher pointer. Except QEMU, which
  550. // is buggy, as usual: it won't adjust the pointer upward.
  551. // So adjust it upward a little bit ourselves: 1/4 MB to get
  552. // away from the running binary image and then round up
  553. // to a MB boundary.
  554. p = (byte*)ROUND((uintptr)_end + (1<<18), 1<<20);
  555. p_size = bitmap_size + spans_size + arena_size + PageSize;
  556. p = runtime_SysReserve(p, p_size, &reserved);
  557. if(p == nil)
  558. runtime_throw("runtime: cannot reserve arena virtual address space");
  559. }
  560. // PageSize can be larger than OS definition of page size,
  561. // so SysReserve can give us a PageSize-unaligned pointer.
  562. // To overcome this we ask for PageSize more and round up the pointer.
  563. p1 = (byte*)ROUND((uintptr)p, PageSize);
  564. runtime_mheap.spans = (MSpan**)p1;
  565. runtime_mheap.bitmap = p1 + spans_size;
  566. runtime_mheap.arena_start = p1 + spans_size + bitmap_size;
  567. runtime_mheap.arena_used = runtime_mheap.arena_start;
  568. runtime_mheap.arena_end = p + p_size;
  569. runtime_mheap.arena_reserved = reserved;
  570. if(((uintptr)runtime_mheap.arena_start & (PageSize-1)) != 0)
  571. runtime_throw("misrounded allocation in mallocinit");
  572. // Initialize the rest of the allocator.
  573. runtime_MHeap_Init(&runtime_mheap);
  574. runtime_m()->mcache = runtime_allocmcache();
  575. // See if it works.
  576. runtime_free(runtime_malloc(TinySize));
  577. }
  578. void*
  579. runtime_MHeap_SysAlloc(MHeap *h, uintptr n)
  580. {
  581. byte *p, *p_end;
  582. uintptr p_size;
  583. bool reserved;
  584. if(n > (uintptr)(h->arena_end - h->arena_used)) {
  585. // We are in 32-bit mode, maybe we didn't use all possible address space yet.
  586. // Reserve some more space.
  587. byte *new_end;
  588. p_size = ROUND(n + PageSize, 256<<20);
  589. new_end = h->arena_end + p_size;
  590. if(new_end <= h->arena_start + MaxArena32) {
  591. // TODO: It would be bad if part of the arena
  592. // is reserved and part is not.
  593. p = runtime_SysReserve(h->arena_end, p_size, &reserved);
  594. if(p == h->arena_end) {
  595. h->arena_end = new_end;
  596. h->arena_reserved = reserved;
  597. }
  598. else if(p+p_size <= h->arena_start + MaxArena32) {
  599. // Keep everything page-aligned.
  600. // Our pages are bigger than hardware pages.
  601. h->arena_end = p+p_size;
  602. h->arena_used = p + (-(uintptr)p&(PageSize-1));
  603. h->arena_reserved = reserved;
  604. } else {
  605. uint64 stat;
  606. stat = 0;
  607. runtime_SysFree(p, p_size, &stat);
  608. }
  609. }
  610. }
  611. if(n <= (uintptr)(h->arena_end - h->arena_used)) {
  612. // Keep taking from our reservation.
  613. p = h->arena_used;
  614. runtime_SysMap(p, n, h->arena_reserved, &mstats.heap_sys);
  615. h->arena_used += n;
  616. runtime_MHeap_MapBits(h);
  617. runtime_MHeap_MapSpans(h);
  618. if(((uintptr)p & (PageSize-1)) != 0)
  619. runtime_throw("misrounded allocation in MHeap_SysAlloc");
  620. return p;
  621. }
  622. // If using 64-bit, our reservation is all we have.
  623. if((uintptr)(h->arena_end - h->arena_start) >= MaxArena32)
  624. return nil;
  625. // On 32-bit, once the reservation is gone we can
  626. // try to get memory at a location chosen by the OS
  627. // and hope that it is in the range we allocated bitmap for.
  628. p_size = ROUND(n, PageSize) + PageSize;
  629. p = runtime_SysAlloc(p_size, &mstats.heap_sys);
  630. if(p == nil)
  631. return nil;
  632. if(p < h->arena_start || (uintptr)(p+p_size - h->arena_start) >= MaxArena32) {
  633. runtime_printf("runtime: memory allocated by OS (%p) not in usable range [%p,%p)\n",
  634. p, h->arena_start, h->arena_start+MaxArena32);
  635. runtime_SysFree(p, p_size, &mstats.heap_sys);
  636. return nil;
  637. }
  638. p_end = p + p_size;
  639. p += -(uintptr)p & (PageSize-1);
  640. if(p+n > h->arena_used) {
  641. h->arena_used = p+n;
  642. if(p_end > h->arena_end)
  643. h->arena_end = p_end;
  644. runtime_MHeap_MapBits(h);
  645. runtime_MHeap_MapSpans(h);
  646. }
  647. if(((uintptr)p & (PageSize-1)) != 0)
  648. runtime_throw("misrounded allocation in MHeap_SysAlloc");
  649. return p;
  650. }
  651. static struct
  652. {
  653. Lock;
  654. byte* pos;
  655. byte* end;
  656. } persistent;
  657. enum
  658. {
  659. PersistentAllocChunk = 256<<10,
  660. PersistentAllocMaxBlock = 64<<10, // VM reservation granularity is 64K on windows
  661. };
  662. // Wrapper around SysAlloc that can allocate small chunks.
  663. // There is no associated free operation.
  664. // Intended for things like function/type/debug-related persistent data.
  665. // If align is 0, uses default align (currently 8).
  666. void*
  667. runtime_persistentalloc(uintptr size, uintptr align, uint64 *stat)
  668. {
  669. byte *p;
  670. if(align != 0) {
  671. if(align&(align-1))
  672. runtime_throw("persistentalloc: align is not a power of 2");
  673. if(align > PageSize)
  674. runtime_throw("persistentalloc: align is too large");
  675. } else
  676. align = 8;
  677. if(size >= PersistentAllocMaxBlock)
  678. return runtime_SysAlloc(size, stat);
  679. runtime_lock(&persistent);
  680. persistent.pos = (byte*)ROUND((uintptr)persistent.pos, align);
  681. if(persistent.pos + size > persistent.end) {
  682. persistent.pos = runtime_SysAlloc(PersistentAllocChunk, &mstats.other_sys);
  683. if(persistent.pos == nil) {
  684. runtime_unlock(&persistent);
  685. runtime_throw("runtime: cannot allocate memory");
  686. }
  687. persistent.end = persistent.pos + PersistentAllocChunk;
  688. }
  689. p = persistent.pos;
  690. persistent.pos += size;
  691. runtime_unlock(&persistent);
  692. if(stat != &mstats.other_sys) {
  693. // reaccount the allocation against provided stat
  694. runtime_xadd64(stat, size);
  695. runtime_xadd64(&mstats.other_sys, -(uint64)size);
  696. }
  697. return p;
  698. }
  699. static void
  700. settype(MSpan *s, void *v, uintptr typ)
  701. {
  702. uintptr size, ofs, j, t;
  703. uintptr ntypes, nbytes2, nbytes3;
  704. uintptr *data2;
  705. byte *data3;
  706. if(s->sizeclass == 0) {
  707. s->types.compression = MTypes_Single;
  708. s->types.data = typ;
  709. return;
  710. }
  711. size = s->elemsize;
  712. ofs = ((uintptr)v - (s->start<<PageShift)) / size;
  713. switch(s->types.compression) {
  714. case MTypes_Empty:
  715. ntypes = (s->npages << PageShift) / size;
  716. nbytes3 = 8*sizeof(uintptr) + 1*ntypes;
  717. data3 = runtime_mallocgc(nbytes3, 0, FlagNoProfiling|FlagNoScan|FlagNoInvokeGC);
  718. s->types.compression = MTypes_Bytes;
  719. s->types.data = (uintptr)data3;
  720. ((uintptr*)data3)[1] = typ;
  721. data3[8*sizeof(uintptr) + ofs] = 1;
  722. break;
  723. case MTypes_Words:
  724. ((uintptr*)s->types.data)[ofs] = typ;
  725. break;
  726. case MTypes_Bytes:
  727. data3 = (byte*)s->types.data;
  728. for(j=1; j<8; j++) {
  729. if(((uintptr*)data3)[j] == typ) {
  730. break;
  731. }
  732. if(((uintptr*)data3)[j] == 0) {
  733. ((uintptr*)data3)[j] = typ;
  734. break;
  735. }
  736. }
  737. if(j < 8) {
  738. data3[8*sizeof(uintptr) + ofs] = j;
  739. } else {
  740. ntypes = (s->npages << PageShift) / size;
  741. nbytes2 = ntypes * sizeof(uintptr);
  742. data2 = runtime_mallocgc(nbytes2, 0, FlagNoProfiling|FlagNoScan|FlagNoInvokeGC);
  743. s->types.compression = MTypes_Words;
  744. s->types.data = (uintptr)data2;
  745. // Move the contents of data3 to data2. Then deallocate data3.
  746. for(j=0; j<ntypes; j++) {
  747. t = data3[8*sizeof(uintptr) + j];
  748. t = ((uintptr*)data3)[t];
  749. data2[j] = t;
  750. }
  751. data2[ofs] = typ;
  752. }
  753. break;
  754. }
  755. }
  756. uintptr
  757. runtime_gettype(void *v)
  758. {
  759. MSpan *s;
  760. uintptr t, ofs;
  761. byte *data;
  762. s = runtime_MHeap_LookupMaybe(&runtime_mheap, v);
  763. if(s != nil) {
  764. t = 0;
  765. switch(s->types.compression) {
  766. case MTypes_Empty:
  767. break;
  768. case MTypes_Single:
  769. t = s->types.data;
  770. break;
  771. case MTypes_Words:
  772. ofs = (uintptr)v - (s->start<<PageShift);
  773. t = ((uintptr*)s->types.data)[ofs/s->elemsize];
  774. break;
  775. case MTypes_Bytes:
  776. ofs = (uintptr)v - (s->start<<PageShift);
  777. data = (byte*)s->types.data;
  778. t = data[8*sizeof(uintptr) + ofs/s->elemsize];
  779. t = ((uintptr*)data)[t];
  780. break;
  781. default:
  782. runtime_throw("runtime_gettype: invalid compression kind");
  783. }
  784. if(0) {
  785. runtime_printf("%p -> %d,%X\n", v, (int32)s->types.compression, (int64)t);
  786. }
  787. return t;
  788. }
  789. return 0;
  790. }
  791. // Runtime stubs.
  792. void*
  793. runtime_mal(uintptr n)
  794. {
  795. return runtime_mallocgc(n, 0, 0);
  796. }
  797. func new(typ *Type) (ret *uint8) {
  798. ret = runtime_mallocgc(typ->__size, (uintptr)typ | TypeInfo_SingleObject, typ->kind&KindNoPointers ? FlagNoScan : 0);
  799. }
  800. static void*
  801. cnew(const Type *typ, intgo n, int32 objtyp)
  802. {
  803. if((objtyp&(PtrSize-1)) != objtyp)
  804. runtime_throw("runtime: invalid objtyp");
  805. if(n < 0 || (typ->__size > 0 && (uintptr)n > (MaxMem/typ->__size)))
  806. runtime_panicstring("runtime: allocation size out of range");
  807. return runtime_mallocgc(typ->__size*n, (uintptr)typ | objtyp, typ->kind&KindNoPointers ? FlagNoScan : 0);
  808. }
  809. // same as runtime_new, but callable from C
  810. void*
  811. runtime_cnew(const Type *typ)
  812. {
  813. return cnew(typ, 1, TypeInfo_SingleObject);
  814. }
  815. void*
  816. runtime_cnewarray(const Type *typ, intgo n)
  817. {
  818. return cnew(typ, n, TypeInfo_Array);
  819. }
  820. func GC() {
  821. runtime_gc(2); // force GC and do eager sweep
  822. }
  823. func SetFinalizer(obj Eface, finalizer Eface) {
  824. byte *base;
  825. uintptr size;
  826. const FuncType *ft;
  827. const Type *fint;
  828. const PtrType *ot;
  829. if(obj.__type_descriptor == nil) {
  830. runtime_printf("runtime.SetFinalizer: first argument is nil interface\n");
  831. goto throw;
  832. }
  833. if((obj.__type_descriptor->kind&kindMask) != GO_PTR) {
  834. runtime_printf("runtime.SetFinalizer: first argument is %S, not pointer\n", *obj.__type_descriptor->__reflection);
  835. goto throw;
  836. }
  837. ot = (const PtrType*)obj.type;
  838. // As an implementation detail we do not run finalizers for zero-sized objects,
  839. // because we use &runtime_zerobase for all such allocations.
  840. if(ot->__element_type != nil && ot->__element_type->__size == 0)
  841. return;
  842. // The following check is required for cases when a user passes a pointer to composite literal,
  843. // but compiler makes it a pointer to global. For example:
  844. // var Foo = &Object{}
  845. // func main() {
  846. // runtime.SetFinalizer(Foo, nil)
  847. // }
  848. // See issue 7656.
  849. if((byte*)obj.__object < runtime_mheap.arena_start || runtime_mheap.arena_used <= (byte*)obj.__object)
  850. return;
  851. if(!runtime_mlookup(obj.__object, &base, &size, nil) || obj.__object != base) {
  852. // As an implementation detail we allow to set finalizers for an inner byte
  853. // of an object if it could come from tiny alloc (see mallocgc for details).
  854. if(ot->__element_type == nil || (ot->__element_type->kind&KindNoPointers) == 0 || ot->__element_type->__size >= TinySize) {
  855. runtime_printf("runtime.SetFinalizer: pointer not at beginning of allocated block (%p)\n", obj.__object);
  856. goto throw;
  857. }
  858. }
  859. if(finalizer.__type_descriptor != nil) {
  860. runtime_createfing();
  861. if((finalizer.__type_descriptor->kind&kindMask) != GO_FUNC)
  862. goto badfunc;
  863. ft = (const FuncType*)finalizer.__type_descriptor;
  864. if(ft->__dotdotdot || ft->__in.__count != 1)
  865. goto badfunc;
  866. fint = *(Type**)ft->__in.__values;
  867. if(__go_type_descriptors_equal(fint, obj.__type_descriptor)) {
  868. // ok - same type
  869. } else if((fint->kind&kindMask) == GO_PTR && (fint->__uncommon == nil || fint->__uncommon->__name == nil || obj.type->__uncommon == nil || obj.type->__uncommon->__name == nil) && __go_type_descriptors_equal(((const PtrType*)fint)->__element_type, ((const PtrType*)obj.type)->__element_type)) {
  870. // ok - not same type, but both pointers,
  871. // one or the other is unnamed, and same element type, so assignable.
  872. } else if((fint->kind&kindMask) == GO_INTERFACE && ((const InterfaceType*)fint)->__methods.__count == 0) {
  873. // ok - satisfies empty interface
  874. } else if((fint->kind&kindMask) == GO_INTERFACE && __go_convert_interface_2(fint, obj.__type_descriptor, 1) != nil) {
  875. // ok - satisfies non-empty interface
  876. } else
  877. goto badfunc;
  878. ot = (const PtrType*)obj.__type_descriptor;
  879. if(!runtime_addfinalizer(obj.__object, *(FuncVal**)finalizer.__object, ft, ot)) {
  880. runtime_printf("runtime.SetFinalizer: finalizer already set\n");
  881. goto throw;
  882. }
  883. } else {
  884. // NOTE: asking to remove a finalizer when there currently isn't one set is OK.
  885. runtime_removefinalizer(obj.__object);
  886. }
  887. return;
  888. badfunc:
  889. runtime_printf("runtime.SetFinalizer: cannot pass %S to finalizer %S\n", *obj.__type_descriptor->__reflection, *finalizer.__type_descriptor->__reflection);
  890. throw:
  891. runtime_throw("runtime.SetFinalizer");
  892. }