mgc0.c 72 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752
  1. // Copyright 2009 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // Garbage collector (GC).
  5. //
  6. // GC is:
  7. // - mark&sweep
  8. // - mostly precise (with the exception of some C-allocated objects, assembly frames/arguments, etc)
  9. // - parallel (up to MaxGcproc threads)
  10. // - partially concurrent (mark is stop-the-world, while sweep is concurrent)
  11. // - non-moving/non-compacting
  12. // - full (non-partial)
  13. //
  14. // GC rate.
  15. // Next GC is after we've allocated an extra amount of memory proportional to
  16. // the amount already in use. The proportion is controlled by GOGC environment variable
  17. // (100 by default). If GOGC=100 and we're using 4M, we'll GC again when we get to 8M
  18. // (this mark is tracked in next_gc variable). This keeps the GC cost in linear
  19. // proportion to the allocation cost. Adjusting GOGC just changes the linear constant
  20. // (and also the amount of extra memory used).
  21. //
  22. // Concurrent sweep.
  23. // The sweep phase proceeds concurrently with normal program execution.
  24. // The heap is swept span-by-span both lazily (when a goroutine needs another span)
  25. // and concurrently in a background goroutine (this helps programs that are not CPU bound).
  26. // However, at the end of the stop-the-world GC phase we don't know the size of the live heap,
  27. // and so next_gc calculation is tricky and happens as follows.
  28. // At the end of the stop-the-world phase next_gc is conservatively set based on total
  29. // heap size; all spans are marked as "needs sweeping".
  30. // Whenever a span is swept, next_gc is decremented by GOGC*newly_freed_memory.
  31. // The background sweeper goroutine simply sweeps spans one-by-one bringing next_gc
  32. // closer to the target value. However, this is not enough to avoid over-allocating memory.
  33. // Consider that a goroutine wants to allocate a new span for a large object and
  34. // there are no free swept spans, but there are small-object unswept spans.
  35. // If the goroutine naively allocates a new span, it can surpass the yet-unknown
  36. // target next_gc value. In order to prevent such cases (1) when a goroutine needs
  37. // to allocate a new small-object span, it sweeps small-object spans for the same
  38. // object size until it frees at least one object; (2) when a goroutine needs to
  39. // allocate large-object span from heap, it sweeps spans until it frees at least
  40. // that many pages into heap. Together these two measures ensure that we don't surpass
  41. // target next_gc value by a large margin. There is an exception: if a goroutine sweeps
  42. // and frees two nonadjacent one-page spans to the heap, it will allocate a new two-page span,
  43. // but there can still be other one-page unswept spans which could be combined into a two-page span.
  44. // It's critical to ensure that no operations proceed on unswept spans (that would corrupt
  45. // mark bits in GC bitmap). During GC all mcaches are flushed into the central cache,
  46. // so they are empty. When a goroutine grabs a new span into mcache, it sweeps it.
  47. // When a goroutine explicitly frees an object or sets a finalizer, it ensures that
  48. // the span is swept (either by sweeping it, or by waiting for the concurrent sweep to finish).
  49. // The finalizer goroutine is kicked off only when all spans are swept.
  50. // When the next GC starts, it sweeps all not-yet-swept spans (if any).
  51. #include <unistd.h>
  52. #include "runtime.h"
  53. #include "arch.h"
  54. #include "malloc.h"
  55. #include "mgc0.h"
  56. #include "chan.h"
  57. #include "go-type.h"
  58. // Map gccgo field names to gc field names.
  59. // Slice aka __go_open_array.
  60. #define array __values
  61. #define cap __capacity
  62. // Iface aka __go_interface
  63. #define tab __methods
  64. // Hmap aka __go_map
  65. typedef struct __go_map Hmap;
  66. // Type aka __go_type_descriptor
  67. #define string __reflection
  68. #define KindPtr GO_PTR
  69. #define KindNoPointers GO_NO_POINTERS
  70. #define kindMask GO_CODE_MASK
  71. // PtrType aka __go_ptr_type
  72. #define elem __element_type
  73. #ifdef USING_SPLIT_STACK
  74. extern void * __splitstack_find (void *, void *, size_t *, void **, void **,
  75. void **);
  76. extern void * __splitstack_find_context (void *context[10], size_t *, void **,
  77. void **, void **);
  78. #endif
  79. enum {
  80. Debug = 0,
  81. CollectStats = 0,
  82. ConcurrentSweep = 1,
  83. WorkbufSize = 16*1024,
  84. FinBlockSize = 4*1024,
  85. handoffThreshold = 4,
  86. IntermediateBufferCapacity = 64,
  87. // Bits in type information
  88. PRECISE = 1,
  89. LOOP = 2,
  90. PC_BITS = PRECISE | LOOP,
  91. RootData = 0,
  92. RootBss = 1,
  93. RootFinalizers = 2,
  94. RootSpanTypes = 3,
  95. RootFlushCaches = 4,
  96. RootCount = 5,
  97. };
  98. #define GcpercentUnknown (-2)
  99. // Initialized from $GOGC. GOGC=off means no gc.
  100. static int32 gcpercent = GcpercentUnknown;
  101. static FuncVal* poolcleanup;
  102. void sync_runtime_registerPoolCleanup(FuncVal*)
  103. __asm__ (GOSYM_PREFIX "sync.runtime_registerPoolCleanup");
  104. void
  105. sync_runtime_registerPoolCleanup(FuncVal *f)
  106. {
  107. poolcleanup = f;
  108. }
  109. static void
  110. clearpools(void)
  111. {
  112. P *p, **pp;
  113. MCache *c;
  114. // clear sync.Pool's
  115. if(poolcleanup != nil) {
  116. __builtin_call_with_static_chain(poolcleanup->fn(),
  117. poolcleanup);
  118. }
  119. for(pp=runtime_allp; (p=*pp) != nil; pp++) {
  120. // clear tinyalloc pool
  121. c = p->mcache;
  122. if(c != nil) {
  123. c->tiny = nil;
  124. c->tinysize = 0;
  125. }
  126. // clear defer pools
  127. p->deferpool = nil;
  128. }
  129. }
  130. // Holding worldsema grants an M the right to try to stop the world.
  131. // The procedure is:
  132. //
  133. // runtime_semacquire(&runtime_worldsema);
  134. // m->gcing = 1;
  135. // runtime_stoptheworld();
  136. //
  137. // ... do stuff ...
  138. //
  139. // m->gcing = 0;
  140. // runtime_semrelease(&runtime_worldsema);
  141. // runtime_starttheworld();
  142. //
  143. uint32 runtime_worldsema = 1;
  144. typedef struct Workbuf Workbuf;
  145. struct Workbuf
  146. {
  147. #define SIZE (WorkbufSize-sizeof(LFNode)-sizeof(uintptr))
  148. LFNode node; // must be first
  149. uintptr nobj;
  150. Obj obj[SIZE/sizeof(Obj) - 1];
  151. uint8 _padding[SIZE%sizeof(Obj) + sizeof(Obj)];
  152. #undef SIZE
  153. };
  154. typedef struct Finalizer Finalizer;
  155. struct Finalizer
  156. {
  157. FuncVal *fn;
  158. void *arg;
  159. const struct __go_func_type *ft;
  160. const PtrType *ot;
  161. };
  162. typedef struct FinBlock FinBlock;
  163. struct FinBlock
  164. {
  165. FinBlock *alllink;
  166. FinBlock *next;
  167. int32 cnt;
  168. int32 cap;
  169. Finalizer fin[1];
  170. };
  171. static Lock finlock; // protects the following variables
  172. static FinBlock *finq; // list of finalizers that are to be executed
  173. static FinBlock *finc; // cache of free blocks
  174. static FinBlock *allfin; // list of all blocks
  175. bool runtime_fingwait;
  176. bool runtime_fingwake;
  177. static Lock gclock;
  178. static G* fing;
  179. static void runfinq(void*);
  180. static void bgsweep(void*);
  181. static Workbuf* getempty(Workbuf*);
  182. static Workbuf* getfull(Workbuf*);
  183. static void putempty(Workbuf*);
  184. static Workbuf* handoff(Workbuf*);
  185. static void gchelperstart(void);
  186. static void flushallmcaches(void);
  187. static void addstackroots(G *gp, Workbuf **wbufp);
  188. static struct {
  189. uint64 full; // lock-free list of full blocks
  190. uint64 empty; // lock-free list of empty blocks
  191. byte pad0[CacheLineSize]; // prevents false-sharing between full/empty and nproc/nwait
  192. uint32 nproc;
  193. int64 tstart;
  194. volatile uint32 nwait;
  195. volatile uint32 ndone;
  196. Note alldone;
  197. ParFor *markfor;
  198. Lock;
  199. byte *chunk;
  200. uintptr nchunk;
  201. } work __attribute__((aligned(8)));
  202. enum {
  203. GC_DEFAULT_PTR = GC_NUM_INSTR,
  204. GC_CHAN,
  205. GC_NUM_INSTR2
  206. };
  207. static struct {
  208. struct {
  209. uint64 sum;
  210. uint64 cnt;
  211. } ptr;
  212. uint64 nbytes;
  213. struct {
  214. uint64 sum;
  215. uint64 cnt;
  216. uint64 notype;
  217. uint64 typelookup;
  218. } obj;
  219. uint64 rescan;
  220. uint64 rescanbytes;
  221. uint64 instr[GC_NUM_INSTR2];
  222. uint64 putempty;
  223. uint64 getfull;
  224. struct {
  225. uint64 foundbit;
  226. uint64 foundword;
  227. uint64 foundspan;
  228. } flushptrbuf;
  229. struct {
  230. uint64 foundbit;
  231. uint64 foundword;
  232. uint64 foundspan;
  233. } markonly;
  234. uint32 nbgsweep;
  235. uint32 npausesweep;
  236. } gcstats;
  237. // markonly marks an object. It returns true if the object
  238. // has been marked by this function, false otherwise.
  239. // This function doesn't append the object to any buffer.
  240. static bool
  241. markonly(const void *obj)
  242. {
  243. byte *p;
  244. uintptr *bitp, bits, shift, x, xbits, off, j;
  245. MSpan *s;
  246. PageID k;
  247. // Words outside the arena cannot be pointers.
  248. if((const byte*)obj < runtime_mheap.arena_start || (const byte*)obj >= runtime_mheap.arena_used)
  249. return false;
  250. // obj may be a pointer to a live object.
  251. // Try to find the beginning of the object.
  252. // Round down to word boundary.
  253. obj = (const void*)((uintptr)obj & ~((uintptr)PtrSize-1));
  254. // Find bits for this word.
  255. off = (const uintptr*)obj - (uintptr*)runtime_mheap.arena_start;
  256. bitp = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
  257. shift = off % wordsPerBitmapWord;
  258. xbits = *bitp;
  259. bits = xbits >> shift;
  260. // Pointing at the beginning of a block?
  261. if((bits & (bitAllocated|bitBlockBoundary)) != 0) {
  262. if(CollectStats)
  263. runtime_xadd64(&gcstats.markonly.foundbit, 1);
  264. goto found;
  265. }
  266. // Pointing just past the beginning?
  267. // Scan backward a little to find a block boundary.
  268. for(j=shift; j-->0; ) {
  269. if(((xbits>>j) & (bitAllocated|bitBlockBoundary)) != 0) {
  270. shift = j;
  271. bits = xbits>>shift;
  272. if(CollectStats)
  273. runtime_xadd64(&gcstats.markonly.foundword, 1);
  274. goto found;
  275. }
  276. }
  277. // Otherwise consult span table to find beginning.
  278. // (Manually inlined copy of MHeap_LookupMaybe.)
  279. k = (uintptr)obj>>PageShift;
  280. x = k;
  281. x -= (uintptr)runtime_mheap.arena_start>>PageShift;
  282. s = runtime_mheap.spans[x];
  283. if(s == nil || k < s->start || (const byte*)obj >= s->limit || s->state != MSpanInUse)
  284. return false;
  285. p = (byte*)((uintptr)s->start<<PageShift);
  286. if(s->sizeclass == 0) {
  287. obj = p;
  288. } else {
  289. uintptr size = s->elemsize;
  290. int32 i = ((const byte*)obj - p)/size;
  291. obj = p+i*size;
  292. }
  293. // Now that we know the object header, reload bits.
  294. off = (const uintptr*)obj - (uintptr*)runtime_mheap.arena_start;
  295. bitp = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
  296. shift = off % wordsPerBitmapWord;
  297. xbits = *bitp;
  298. bits = xbits >> shift;
  299. if(CollectStats)
  300. runtime_xadd64(&gcstats.markonly.foundspan, 1);
  301. found:
  302. // Now we have bits, bitp, and shift correct for
  303. // obj pointing at the base of the object.
  304. // Only care about allocated and not marked.
  305. if((bits & (bitAllocated|bitMarked)) != bitAllocated)
  306. return false;
  307. if(work.nproc == 1)
  308. *bitp |= bitMarked<<shift;
  309. else {
  310. for(;;) {
  311. x = *bitp;
  312. if(x & (bitMarked<<shift))
  313. return false;
  314. if(runtime_casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift))))
  315. break;
  316. }
  317. }
  318. // The object is now marked
  319. return true;
  320. }
  321. // PtrTarget is a structure used by intermediate buffers.
  322. // The intermediate buffers hold GC data before it
  323. // is moved/flushed to the work buffer (Workbuf).
  324. // The size of an intermediate buffer is very small,
  325. // such as 32 or 64 elements.
  326. typedef struct PtrTarget PtrTarget;
  327. struct PtrTarget
  328. {
  329. void *p;
  330. uintptr ti;
  331. };
  332. typedef struct Scanbuf Scanbuf;
  333. struct Scanbuf
  334. {
  335. struct {
  336. PtrTarget *begin;
  337. PtrTarget *end;
  338. PtrTarget *pos;
  339. } ptr;
  340. struct {
  341. Obj *begin;
  342. Obj *end;
  343. Obj *pos;
  344. } obj;
  345. Workbuf *wbuf;
  346. Obj *wp;
  347. uintptr nobj;
  348. };
  349. typedef struct BufferList BufferList;
  350. struct BufferList
  351. {
  352. PtrTarget ptrtarget[IntermediateBufferCapacity];
  353. Obj obj[IntermediateBufferCapacity];
  354. uint32 busy;
  355. byte pad[CacheLineSize];
  356. };
  357. static BufferList bufferList[MaxGcproc];
  358. static void enqueue(Obj obj, Workbuf **_wbuf, Obj **_wp, uintptr *_nobj);
  359. // flushptrbuf moves data from the PtrTarget buffer to the work buffer.
  360. // The PtrTarget buffer contains blocks irrespective of whether the blocks have been marked or scanned,
  361. // while the work buffer contains blocks which have been marked
  362. // and are prepared to be scanned by the garbage collector.
  363. //
  364. // _wp, _wbuf, _nobj are input/output parameters and are specifying the work buffer.
  365. //
  366. // A simplified drawing explaining how the todo-list moves from a structure to another:
  367. //
  368. // scanblock
  369. // (find pointers)
  370. // Obj ------> PtrTarget (pointer targets)
  371. // ↑ |
  372. // | |
  373. // `----------'
  374. // flushptrbuf
  375. // (find block start, mark and enqueue)
  376. static void
  377. flushptrbuf(Scanbuf *sbuf)
  378. {
  379. byte *p, *arena_start, *obj;
  380. uintptr size, *bitp, bits, shift, j, x, xbits, off, nobj, ti, n;
  381. MSpan *s;
  382. PageID k;
  383. Obj *wp;
  384. Workbuf *wbuf;
  385. PtrTarget *ptrbuf;
  386. PtrTarget *ptrbuf_end;
  387. arena_start = runtime_mheap.arena_start;
  388. wp = sbuf->wp;
  389. wbuf = sbuf->wbuf;
  390. nobj = sbuf->nobj;
  391. ptrbuf = sbuf->ptr.begin;
  392. ptrbuf_end = sbuf->ptr.pos;
  393. n = ptrbuf_end - sbuf->ptr.begin;
  394. sbuf->ptr.pos = sbuf->ptr.begin;
  395. if(CollectStats) {
  396. runtime_xadd64(&gcstats.ptr.sum, n);
  397. runtime_xadd64(&gcstats.ptr.cnt, 1);
  398. }
  399. // If buffer is nearly full, get a new one.
  400. if(wbuf == nil || nobj+n >= nelem(wbuf->obj)) {
  401. if(wbuf != nil)
  402. wbuf->nobj = nobj;
  403. wbuf = getempty(wbuf);
  404. wp = wbuf->obj;
  405. nobj = 0;
  406. if(n >= nelem(wbuf->obj))
  407. runtime_throw("ptrbuf has to be smaller than WorkBuf");
  408. }
  409. while(ptrbuf < ptrbuf_end) {
  410. obj = ptrbuf->p;
  411. ti = ptrbuf->ti;
  412. ptrbuf++;
  413. // obj belongs to interval [mheap.arena_start, mheap.arena_used).
  414. if(Debug > 1) {
  415. if(obj < runtime_mheap.arena_start || obj >= runtime_mheap.arena_used)
  416. runtime_throw("object is outside of mheap");
  417. }
  418. // obj may be a pointer to a live object.
  419. // Try to find the beginning of the object.
  420. // Round down to word boundary.
  421. if(((uintptr)obj & ((uintptr)PtrSize-1)) != 0) {
  422. obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
  423. ti = 0;
  424. }
  425. // Find bits for this word.
  426. off = (uintptr*)obj - (uintptr*)arena_start;
  427. bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
  428. shift = off % wordsPerBitmapWord;
  429. xbits = *bitp;
  430. bits = xbits >> shift;
  431. // Pointing at the beginning of a block?
  432. if((bits & (bitAllocated|bitBlockBoundary)) != 0) {
  433. if(CollectStats)
  434. runtime_xadd64(&gcstats.flushptrbuf.foundbit, 1);
  435. goto found;
  436. }
  437. ti = 0;
  438. // Pointing just past the beginning?
  439. // Scan backward a little to find a block boundary.
  440. for(j=shift; j-->0; ) {
  441. if(((xbits>>j) & (bitAllocated|bitBlockBoundary)) != 0) {
  442. obj = (byte*)obj - (shift-j)*PtrSize;
  443. shift = j;
  444. bits = xbits>>shift;
  445. if(CollectStats)
  446. runtime_xadd64(&gcstats.flushptrbuf.foundword, 1);
  447. goto found;
  448. }
  449. }
  450. // Otherwise consult span table to find beginning.
  451. // (Manually inlined copy of MHeap_LookupMaybe.)
  452. k = (uintptr)obj>>PageShift;
  453. x = k;
  454. x -= (uintptr)arena_start>>PageShift;
  455. s = runtime_mheap.spans[x];
  456. if(s == nil || k < s->start || obj >= s->limit || s->state != MSpanInUse)
  457. continue;
  458. p = (byte*)((uintptr)s->start<<PageShift);
  459. if(s->sizeclass == 0) {
  460. obj = p;
  461. } else {
  462. size = s->elemsize;
  463. int32 i = ((byte*)obj - p)/size;
  464. obj = p+i*size;
  465. }
  466. // Now that we know the object header, reload bits.
  467. off = (uintptr*)obj - (uintptr*)arena_start;
  468. bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
  469. shift = off % wordsPerBitmapWord;
  470. xbits = *bitp;
  471. bits = xbits >> shift;
  472. if(CollectStats)
  473. runtime_xadd64(&gcstats.flushptrbuf.foundspan, 1);
  474. found:
  475. // Now we have bits, bitp, and shift correct for
  476. // obj pointing at the base of the object.
  477. // Only care about allocated and not marked.
  478. if((bits & (bitAllocated|bitMarked)) != bitAllocated)
  479. continue;
  480. if(work.nproc == 1)
  481. *bitp |= bitMarked<<shift;
  482. else {
  483. for(;;) {
  484. x = *bitp;
  485. if(x & (bitMarked<<shift))
  486. goto continue_obj;
  487. if(runtime_casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift))))
  488. break;
  489. }
  490. }
  491. // If object has no pointers, don't need to scan further.
  492. if((bits & bitScan) == 0)
  493. continue;
  494. // Ask span about size class.
  495. // (Manually inlined copy of MHeap_Lookup.)
  496. x = (uintptr)obj >> PageShift;
  497. x -= (uintptr)arena_start>>PageShift;
  498. s = runtime_mheap.spans[x];
  499. PREFETCH(obj);
  500. *wp = (Obj){obj, s->elemsize, ti};
  501. wp++;
  502. nobj++;
  503. continue_obj:;
  504. }
  505. // If another proc wants a pointer, give it some.
  506. if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) {
  507. wbuf->nobj = nobj;
  508. wbuf = handoff(wbuf);
  509. nobj = wbuf->nobj;
  510. wp = wbuf->obj + nobj;
  511. }
  512. sbuf->wp = wp;
  513. sbuf->wbuf = wbuf;
  514. sbuf->nobj = nobj;
  515. }
  516. static void
  517. flushobjbuf(Scanbuf *sbuf)
  518. {
  519. uintptr nobj, off;
  520. Obj *wp, obj;
  521. Workbuf *wbuf;
  522. Obj *objbuf;
  523. Obj *objbuf_end;
  524. wp = sbuf->wp;
  525. wbuf = sbuf->wbuf;
  526. nobj = sbuf->nobj;
  527. objbuf = sbuf->obj.begin;
  528. objbuf_end = sbuf->obj.pos;
  529. sbuf->obj.pos = sbuf->obj.begin;
  530. while(objbuf < objbuf_end) {
  531. obj = *objbuf++;
  532. // Align obj.b to a word boundary.
  533. off = (uintptr)obj.p & (PtrSize-1);
  534. if(off != 0) {
  535. obj.p += PtrSize - off;
  536. obj.n -= PtrSize - off;
  537. obj.ti = 0;
  538. }
  539. if(obj.p == nil || obj.n == 0)
  540. continue;
  541. // If buffer is full, get a new one.
  542. if(wbuf == nil || nobj >= nelem(wbuf->obj)) {
  543. if(wbuf != nil)
  544. wbuf->nobj = nobj;
  545. wbuf = getempty(wbuf);
  546. wp = wbuf->obj;
  547. nobj = 0;
  548. }
  549. *wp = obj;
  550. wp++;
  551. nobj++;
  552. }
  553. // If another proc wants a pointer, give it some.
  554. if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) {
  555. wbuf->nobj = nobj;
  556. wbuf = handoff(wbuf);
  557. nobj = wbuf->nobj;
  558. wp = wbuf->obj + nobj;
  559. }
  560. sbuf->wp = wp;
  561. sbuf->wbuf = wbuf;
  562. sbuf->nobj = nobj;
  563. }
  564. // Program that scans the whole block and treats every block element as a potential pointer
  565. static uintptr defaultProg[2] = {PtrSize, GC_DEFAULT_PTR};
  566. // Hchan program
  567. static uintptr chanProg[2] = {0, GC_CHAN};
  568. // Local variables of a program fragment or loop
  569. typedef struct Frame Frame;
  570. struct Frame {
  571. uintptr count, elemsize, b;
  572. const uintptr *loop_or_ret;
  573. };
  574. // Sanity check for the derived type info objti.
  575. static void
  576. checkptr(void *obj, uintptr objti)
  577. {
  578. uintptr *pc1, type, tisize, i, j, x;
  579. const uintptr *pc2;
  580. byte *objstart;
  581. Type *t;
  582. MSpan *s;
  583. if(!Debug)
  584. runtime_throw("checkptr is debug only");
  585. if((byte*)obj < runtime_mheap.arena_start || (byte*)obj >= runtime_mheap.arena_used)
  586. return;
  587. type = runtime_gettype(obj);
  588. t = (Type*)(type & ~(uintptr)(PtrSize-1));
  589. if(t == nil)
  590. return;
  591. x = (uintptr)obj >> PageShift;
  592. x -= (uintptr)(runtime_mheap.arena_start)>>PageShift;
  593. s = runtime_mheap.spans[x];
  594. objstart = (byte*)((uintptr)s->start<<PageShift);
  595. if(s->sizeclass != 0) {
  596. i = ((byte*)obj - objstart)/s->elemsize;
  597. objstart += i*s->elemsize;
  598. }
  599. tisize = *(uintptr*)objti;
  600. // Sanity check for object size: it should fit into the memory block.
  601. if((byte*)obj + tisize > objstart + s->elemsize) {
  602. runtime_printf("object of type '%S' at %p/%p does not fit in block %p/%p\n",
  603. *t->string, obj, tisize, objstart, s->elemsize);
  604. runtime_throw("invalid gc type info");
  605. }
  606. if(obj != objstart)
  607. return;
  608. // If obj points to the beginning of the memory block,
  609. // check type info as well.
  610. if(t->string == nil ||
  611. // Gob allocates unsafe pointers for indirection.
  612. (runtime_strcmp((const char *)t->string->str, (const char*)"unsafe.Pointer") &&
  613. // Runtime and gc think differently about closures.
  614. runtime_strstr((const char *)t->string->str, (const char*)"struct { F uintptr") != (const char *)t->string->str)) {
  615. pc1 = (uintptr*)objti;
  616. pc2 = (const uintptr*)t->__gc;
  617. // A simple best-effort check until first GC_END.
  618. for(j = 1; pc1[j] != GC_END && pc2[j] != GC_END; j++) {
  619. if(pc1[j] != pc2[j]) {
  620. runtime_printf("invalid gc type info for '%s', type info %p [%d]=%p, block info %p [%d]=%p\n",
  621. t->string ? (const int8*)t->string->str : (const int8*)"?", pc1, (int32)j, pc1[j], pc2, (int32)j, pc2[j]);
  622. runtime_throw("invalid gc type info");
  623. }
  624. }
  625. }
  626. }
  627. // scanblock scans a block of n bytes starting at pointer b for references
  628. // to other objects, scanning any it finds recursively until there are no
  629. // unscanned objects left. Instead of using an explicit recursion, it keeps
  630. // a work list in the Workbuf* structures and loops in the main function
  631. // body. Keeping an explicit work list is easier on the stack allocator and
  632. // more efficient.
  633. static void
  634. scanblock(Workbuf *wbuf, bool keepworking)
  635. {
  636. byte *b, *arena_start, *arena_used;
  637. uintptr n, i, end_b, elemsize, size, ti, objti, count, type, nobj;
  638. uintptr precise_type, nominal_size;
  639. const uintptr *pc, *chan_ret;
  640. uintptr chancap;
  641. void *obj;
  642. const Type *t, *et;
  643. Slice *sliceptr;
  644. String *stringptr;
  645. Frame *stack_ptr, stack_top, stack[GC_STACK_CAPACITY+4];
  646. BufferList *scanbuffers;
  647. Scanbuf sbuf;
  648. Eface *eface;
  649. Iface *iface;
  650. Hchan *chan;
  651. const ChanType *chantype;
  652. Obj *wp;
  653. if(sizeof(Workbuf) % WorkbufSize != 0)
  654. runtime_throw("scanblock: size of Workbuf is suboptimal");
  655. // Memory arena parameters.
  656. arena_start = runtime_mheap.arena_start;
  657. arena_used = runtime_mheap.arena_used;
  658. stack_ptr = stack+nelem(stack)-1;
  659. precise_type = false;
  660. nominal_size = 0;
  661. if(wbuf) {
  662. nobj = wbuf->nobj;
  663. wp = &wbuf->obj[nobj];
  664. } else {
  665. nobj = 0;
  666. wp = nil;
  667. }
  668. // Initialize sbuf
  669. scanbuffers = &bufferList[runtime_m()->helpgc];
  670. sbuf.ptr.begin = sbuf.ptr.pos = &scanbuffers->ptrtarget[0];
  671. sbuf.ptr.end = sbuf.ptr.begin + nelem(scanbuffers->ptrtarget);
  672. sbuf.obj.begin = sbuf.obj.pos = &scanbuffers->obj[0];
  673. sbuf.obj.end = sbuf.obj.begin + nelem(scanbuffers->obj);
  674. sbuf.wbuf = wbuf;
  675. sbuf.wp = wp;
  676. sbuf.nobj = nobj;
  677. // (Silence the compiler)
  678. chan = nil;
  679. chantype = nil;
  680. chan_ret = nil;
  681. goto next_block;
  682. for(;;) {
  683. // Each iteration scans the block b of length n, queueing pointers in
  684. // the work buffer.
  685. if(CollectStats) {
  686. runtime_xadd64(&gcstats.nbytes, n);
  687. runtime_xadd64(&gcstats.obj.sum, sbuf.nobj);
  688. runtime_xadd64(&gcstats.obj.cnt, 1);
  689. }
  690. if(ti != 0) {
  691. if(Debug > 1) {
  692. runtime_printf("scanblock %p %D ti %p\n", b, (int64)n, ti);
  693. }
  694. pc = (uintptr*)(ti & ~(uintptr)PC_BITS);
  695. precise_type = (ti & PRECISE);
  696. stack_top.elemsize = pc[0];
  697. if(!precise_type)
  698. nominal_size = pc[0];
  699. if(ti & LOOP) {
  700. stack_top.count = 0; // 0 means an infinite number of iterations
  701. stack_top.loop_or_ret = pc+1;
  702. } else {
  703. stack_top.count = 1;
  704. }
  705. if(Debug) {
  706. // Simple sanity check for provided type info ti:
  707. // The declared size of the object must be not larger than the actual size
  708. // (it can be smaller due to inferior pointers).
  709. // It's difficult to make a comprehensive check due to inferior pointers,
  710. // reflection, gob, etc.
  711. if(pc[0] > n) {
  712. runtime_printf("invalid gc type info: type info size %p, block size %p\n", pc[0], n);
  713. runtime_throw("invalid gc type info");
  714. }
  715. }
  716. } else if(UseSpanType) {
  717. if(CollectStats)
  718. runtime_xadd64(&gcstats.obj.notype, 1);
  719. type = runtime_gettype(b);
  720. if(type != 0) {
  721. if(CollectStats)
  722. runtime_xadd64(&gcstats.obj.typelookup, 1);
  723. t = (Type*)(type & ~(uintptr)(PtrSize-1));
  724. switch(type & (PtrSize-1)) {
  725. case TypeInfo_SingleObject:
  726. pc = (const uintptr*)t->__gc;
  727. precise_type = true; // type information about 'b' is precise
  728. stack_top.count = 1;
  729. stack_top.elemsize = pc[0];
  730. break;
  731. case TypeInfo_Array:
  732. pc = (const uintptr*)t->__gc;
  733. if(pc[0] == 0)
  734. goto next_block;
  735. precise_type = true; // type information about 'b' is precise
  736. stack_top.count = 0; // 0 means an infinite number of iterations
  737. stack_top.elemsize = pc[0];
  738. stack_top.loop_or_ret = pc+1;
  739. break;
  740. case TypeInfo_Chan:
  741. chan = (Hchan*)b;
  742. chantype = (const ChanType*)t;
  743. chan_ret = nil;
  744. pc = chanProg;
  745. break;
  746. default:
  747. if(Debug > 1)
  748. runtime_printf("scanblock %p %D type %p %S\n", b, (int64)n, type, *t->string);
  749. runtime_throw("scanblock: invalid type");
  750. return;
  751. }
  752. if(Debug > 1)
  753. runtime_printf("scanblock %p %D type %p %S pc=%p\n", b, (int64)n, type, *t->string, pc);
  754. } else {
  755. pc = defaultProg;
  756. if(Debug > 1)
  757. runtime_printf("scanblock %p %D unknown type\n", b, (int64)n);
  758. }
  759. } else {
  760. pc = defaultProg;
  761. if(Debug > 1)
  762. runtime_printf("scanblock %p %D no span types\n", b, (int64)n);
  763. }
  764. if(IgnorePreciseGC)
  765. pc = defaultProg;
  766. pc++;
  767. stack_top.b = (uintptr)b;
  768. end_b = (uintptr)b + n - PtrSize;
  769. for(;;) {
  770. if(CollectStats)
  771. runtime_xadd64(&gcstats.instr[pc[0]], 1);
  772. obj = nil;
  773. objti = 0;
  774. switch(pc[0]) {
  775. case GC_PTR:
  776. obj = *(void**)(stack_top.b + pc[1]);
  777. objti = pc[2];
  778. if(Debug > 2)
  779. runtime_printf("gc_ptr @%p: %p ti=%p\n", stack_top.b+pc[1], obj, objti);
  780. pc += 3;
  781. if(Debug)
  782. checkptr(obj, objti);
  783. break;
  784. case GC_SLICE:
  785. sliceptr = (Slice*)(stack_top.b + pc[1]);
  786. if(Debug > 2)
  787. runtime_printf("gc_slice @%p: %p/%D/%D\n", sliceptr, sliceptr->array, (int64)sliceptr->__count, (int64)sliceptr->cap);
  788. if(sliceptr->cap != 0) {
  789. obj = sliceptr->array;
  790. // Can't use slice element type for scanning,
  791. // because if it points to an array embedded
  792. // in the beginning of a struct,
  793. // we will scan the whole struct as the slice.
  794. // So just obtain type info from heap.
  795. }
  796. pc += 3;
  797. break;
  798. case GC_APTR:
  799. obj = *(void**)(stack_top.b + pc[1]);
  800. if(Debug > 2)
  801. runtime_printf("gc_aptr @%p: %p\n", stack_top.b+pc[1], obj);
  802. pc += 2;
  803. break;
  804. case GC_STRING:
  805. stringptr = (String*)(stack_top.b + pc[1]);
  806. if(Debug > 2)
  807. runtime_printf("gc_string @%p: %p/%D\n", stack_top.b+pc[1], stringptr->str, (int64)stringptr->len);
  808. if(stringptr->len != 0)
  809. markonly(stringptr->str);
  810. pc += 2;
  811. continue;
  812. case GC_EFACE:
  813. eface = (Eface*)(stack_top.b + pc[1]);
  814. pc += 2;
  815. if(Debug > 2)
  816. runtime_printf("gc_eface @%p: %p %p\n", stack_top.b+pc[1], eface->__type_descriptor, eface->__object);
  817. if(eface->__type_descriptor == nil)
  818. continue;
  819. // eface->type
  820. t = eface->__type_descriptor;
  821. if((const byte*)t >= arena_start && (const byte*)t < arena_used) {
  822. union { const Type *tc; Type *tr; } u;
  823. u.tc = t;
  824. *sbuf.ptr.pos++ = (PtrTarget){u.tr, 0};
  825. if(sbuf.ptr.pos == sbuf.ptr.end)
  826. flushptrbuf(&sbuf);
  827. }
  828. // eface->__object
  829. if((byte*)eface->__object >= arena_start && (byte*)eface->__object < arena_used) {
  830. if(__go_is_pointer_type(t)) {
  831. if((t->__code & KindNoPointers))
  832. continue;
  833. obj = eface->__object;
  834. if((t->__code & kindMask) == KindPtr) {
  835. // Only use type information if it is a pointer-containing type.
  836. // This matches the GC programs written by cmd/gc/reflect.c's
  837. // dgcsym1 in case TPTR32/case TPTR64. See rationale there.
  838. et = ((const PtrType*)t)->elem;
  839. if(!(et->__code & KindNoPointers))
  840. objti = (uintptr)((const PtrType*)t)->elem->__gc;
  841. }
  842. } else {
  843. obj = eface->__object;
  844. objti = (uintptr)t->__gc;
  845. }
  846. }
  847. break;
  848. case GC_IFACE:
  849. iface = (Iface*)(stack_top.b + pc[1]);
  850. pc += 2;
  851. if(Debug > 2)
  852. runtime_printf("gc_iface @%p: %p/%p %p\n", stack_top.b+pc[1], iface->__methods[0], nil, iface->__object);
  853. if(iface->tab == nil)
  854. continue;
  855. // iface->tab
  856. if((byte*)iface->tab >= arena_start && (byte*)iface->tab < arena_used) {
  857. *sbuf.ptr.pos++ = (PtrTarget){iface->tab, 0};
  858. if(sbuf.ptr.pos == sbuf.ptr.end)
  859. flushptrbuf(&sbuf);
  860. }
  861. // iface->data
  862. if((byte*)iface->__object >= arena_start && (byte*)iface->__object < arena_used) {
  863. t = (const Type*)iface->tab[0];
  864. if(__go_is_pointer_type(t)) {
  865. if((t->__code & KindNoPointers))
  866. continue;
  867. obj = iface->__object;
  868. if((t->__code & kindMask) == KindPtr) {
  869. // Only use type information if it is a pointer-containing type.
  870. // This matches the GC programs written by cmd/gc/reflect.c's
  871. // dgcsym1 in case TPTR32/case TPTR64. See rationale there.
  872. et = ((const PtrType*)t)->elem;
  873. if(!(et->__code & KindNoPointers))
  874. objti = (uintptr)((const PtrType*)t)->elem->__gc;
  875. }
  876. } else {
  877. obj = iface->__object;
  878. objti = (uintptr)t->__gc;
  879. }
  880. }
  881. break;
  882. case GC_DEFAULT_PTR:
  883. while(stack_top.b <= end_b) {
  884. obj = *(byte**)stack_top.b;
  885. if(Debug > 2)
  886. runtime_printf("gc_default_ptr @%p: %p\n", stack_top.b, obj);
  887. stack_top.b += PtrSize;
  888. if((byte*)obj >= arena_start && (byte*)obj < arena_used) {
  889. *sbuf.ptr.pos++ = (PtrTarget){obj, 0};
  890. if(sbuf.ptr.pos == sbuf.ptr.end)
  891. flushptrbuf(&sbuf);
  892. }
  893. }
  894. goto next_block;
  895. case GC_END:
  896. if(--stack_top.count != 0) {
  897. // Next iteration of a loop if possible.
  898. stack_top.b += stack_top.elemsize;
  899. if(stack_top.b + stack_top.elemsize <= end_b+PtrSize) {
  900. pc = stack_top.loop_or_ret;
  901. continue;
  902. }
  903. i = stack_top.b;
  904. } else {
  905. // Stack pop if possible.
  906. if(stack_ptr+1 < stack+nelem(stack)) {
  907. pc = stack_top.loop_or_ret;
  908. stack_top = *(++stack_ptr);
  909. continue;
  910. }
  911. i = (uintptr)b + nominal_size;
  912. }
  913. if(!precise_type) {
  914. // Quickly scan [b+i,b+n) for possible pointers.
  915. for(; i<=end_b; i+=PtrSize) {
  916. if(*(byte**)i != nil) {
  917. // Found a value that may be a pointer.
  918. // Do a rescan of the entire block.
  919. enqueue((Obj){b, n, 0}, &sbuf.wbuf, &sbuf.wp, &sbuf.nobj);
  920. if(CollectStats) {
  921. runtime_xadd64(&gcstats.rescan, 1);
  922. runtime_xadd64(&gcstats.rescanbytes, n);
  923. }
  924. break;
  925. }
  926. }
  927. }
  928. goto next_block;
  929. case GC_ARRAY_START:
  930. i = stack_top.b + pc[1];
  931. count = pc[2];
  932. elemsize = pc[3];
  933. pc += 4;
  934. // Stack push.
  935. *stack_ptr-- = stack_top;
  936. stack_top = (Frame){count, elemsize, i, pc};
  937. continue;
  938. case GC_ARRAY_NEXT:
  939. if(--stack_top.count != 0) {
  940. stack_top.b += stack_top.elemsize;
  941. pc = stack_top.loop_or_ret;
  942. } else {
  943. // Stack pop.
  944. stack_top = *(++stack_ptr);
  945. pc += 1;
  946. }
  947. continue;
  948. case GC_CALL:
  949. // Stack push.
  950. *stack_ptr-- = stack_top;
  951. stack_top = (Frame){1, 0, stack_top.b + pc[1], pc+3 /*return address*/};
  952. pc = (const uintptr*)((const byte*)pc + *(const int32*)(pc+2)); // target of the CALL instruction
  953. continue;
  954. case GC_REGION:
  955. obj = (void*)(stack_top.b + pc[1]);
  956. size = pc[2];
  957. objti = pc[3];
  958. pc += 4;
  959. if(Debug > 2)
  960. runtime_printf("gc_region @%p: %D %p\n", stack_top.b+pc[1], (int64)size, objti);
  961. *sbuf.obj.pos++ = (Obj){obj, size, objti};
  962. if(sbuf.obj.pos == sbuf.obj.end)
  963. flushobjbuf(&sbuf);
  964. continue;
  965. case GC_CHAN_PTR:
  966. chan = *(Hchan**)(stack_top.b + pc[1]);
  967. if(Debug > 2 && chan != nil)
  968. runtime_printf("gc_chan_ptr @%p: %p/%D/%D %p\n", stack_top.b+pc[1], chan, (int64)chan->qcount, (int64)chan->dataqsiz, pc[2]);
  969. if(chan == nil) {
  970. pc += 3;
  971. continue;
  972. }
  973. if(markonly(chan)) {
  974. chantype = (ChanType*)pc[2];
  975. if(!(chantype->elem->__code & KindNoPointers)) {
  976. // Start chanProg.
  977. chan_ret = pc+3;
  978. pc = chanProg+1;
  979. continue;
  980. }
  981. }
  982. pc += 3;
  983. continue;
  984. case GC_CHAN:
  985. // There are no heap pointers in struct Hchan,
  986. // so we can ignore the leading sizeof(Hchan) bytes.
  987. if(!(chantype->elem->__code & KindNoPointers)) {
  988. // Channel's buffer follows Hchan immediately in memory.
  989. // Size of buffer (cap(c)) is second int in the chan struct.
  990. chancap = ((uintgo*)chan)[1];
  991. if(chancap > 0) {
  992. // TODO(atom): split into two chunks so that only the
  993. // in-use part of the circular buffer is scanned.
  994. // (Channel routines zero the unused part, so the current
  995. // code does not lead to leaks, it's just a little inefficient.)
  996. *sbuf.obj.pos++ = (Obj){(byte*)chan+runtime_Hchansize, chancap*chantype->elem->__size,
  997. (uintptr)chantype->elem->__gc | PRECISE | LOOP};
  998. if(sbuf.obj.pos == sbuf.obj.end)
  999. flushobjbuf(&sbuf);
  1000. }
  1001. }
  1002. if(chan_ret == nil)
  1003. goto next_block;
  1004. pc = chan_ret;
  1005. continue;
  1006. default:
  1007. runtime_printf("runtime: invalid GC instruction %p at %p\n", pc[0], pc);
  1008. runtime_throw("scanblock: invalid GC instruction");
  1009. return;
  1010. }
  1011. if((byte*)obj >= arena_start && (byte*)obj < arena_used) {
  1012. *sbuf.ptr.pos++ = (PtrTarget){obj, objti};
  1013. if(sbuf.ptr.pos == sbuf.ptr.end)
  1014. flushptrbuf(&sbuf);
  1015. }
  1016. }
  1017. next_block:
  1018. // Done scanning [b, b+n). Prepare for the next iteration of
  1019. // the loop by setting b, n, ti to the parameters for the next block.
  1020. if(sbuf.nobj == 0) {
  1021. flushptrbuf(&sbuf);
  1022. flushobjbuf(&sbuf);
  1023. if(sbuf.nobj == 0) {
  1024. if(!keepworking) {
  1025. if(sbuf.wbuf)
  1026. putempty(sbuf.wbuf);
  1027. return;
  1028. }
  1029. // Emptied our buffer: refill.
  1030. sbuf.wbuf = getfull(sbuf.wbuf);
  1031. if(sbuf.wbuf == nil)
  1032. return;
  1033. sbuf.nobj = sbuf.wbuf->nobj;
  1034. sbuf.wp = sbuf.wbuf->obj + sbuf.wbuf->nobj;
  1035. }
  1036. }
  1037. // Fetch b from the work buffer.
  1038. --sbuf.wp;
  1039. b = sbuf.wp->p;
  1040. n = sbuf.wp->n;
  1041. ti = sbuf.wp->ti;
  1042. sbuf.nobj--;
  1043. }
  1044. }
  1045. static struct root_list* roots;
  1046. void
  1047. __go_register_gc_roots (struct root_list* r)
  1048. {
  1049. // FIXME: This needs locking if multiple goroutines can call
  1050. // dlopen simultaneously.
  1051. r->next = roots;
  1052. roots = r;
  1053. }
  1054. // Append obj to the work buffer.
  1055. // _wbuf, _wp, _nobj are input/output parameters and are specifying the work buffer.
  1056. static void
  1057. enqueue(Obj obj, Workbuf **_wbuf, Obj **_wp, uintptr *_nobj)
  1058. {
  1059. uintptr nobj, off;
  1060. Obj *wp;
  1061. Workbuf *wbuf;
  1062. if(Debug > 1)
  1063. runtime_printf("append obj(%p %D %p)\n", obj.p, (int64)obj.n, obj.ti);
  1064. // Align obj.b to a word boundary.
  1065. off = (uintptr)obj.p & (PtrSize-1);
  1066. if(off != 0) {
  1067. obj.p += PtrSize - off;
  1068. obj.n -= PtrSize - off;
  1069. obj.ti = 0;
  1070. }
  1071. if(obj.p == nil || obj.n == 0)
  1072. return;
  1073. // Load work buffer state
  1074. wp = *_wp;
  1075. wbuf = *_wbuf;
  1076. nobj = *_nobj;
  1077. // If another proc wants a pointer, give it some.
  1078. if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) {
  1079. wbuf->nobj = nobj;
  1080. wbuf = handoff(wbuf);
  1081. nobj = wbuf->nobj;
  1082. wp = wbuf->obj + nobj;
  1083. }
  1084. // If buffer is full, get a new one.
  1085. if(wbuf == nil || nobj >= nelem(wbuf->obj)) {
  1086. if(wbuf != nil)
  1087. wbuf->nobj = nobj;
  1088. wbuf = getempty(wbuf);
  1089. wp = wbuf->obj;
  1090. nobj = 0;
  1091. }
  1092. *wp = obj;
  1093. wp++;
  1094. nobj++;
  1095. // Save work buffer state
  1096. *_wp = wp;
  1097. *_wbuf = wbuf;
  1098. *_nobj = nobj;
  1099. }
  1100. static void
  1101. enqueue1(Workbuf **wbufp, Obj obj)
  1102. {
  1103. Workbuf *wbuf;
  1104. wbuf = *wbufp;
  1105. if(wbuf->nobj >= nelem(wbuf->obj))
  1106. *wbufp = wbuf = getempty(wbuf);
  1107. wbuf->obj[wbuf->nobj++] = obj;
  1108. }
  1109. static void
  1110. markroot(ParFor *desc, uint32 i)
  1111. {
  1112. Workbuf *wbuf;
  1113. FinBlock *fb;
  1114. MHeap *h;
  1115. MSpan **allspans, *s;
  1116. uint32 spanidx, sg;
  1117. G *gp;
  1118. void *p;
  1119. USED(&desc);
  1120. wbuf = getempty(nil);
  1121. // Note: if you add a case here, please also update heapdump.c:dumproots.
  1122. switch(i) {
  1123. case RootData:
  1124. // For gccgo this is both data and bss.
  1125. {
  1126. struct root_list *pl;
  1127. for(pl = roots; pl != nil; pl = pl->next) {
  1128. struct root *pr = &pl->roots[0];
  1129. while(1) {
  1130. void *decl = pr->decl;
  1131. if(decl == nil)
  1132. break;
  1133. enqueue1(&wbuf, (Obj){decl, pr->size, 0});
  1134. pr++;
  1135. }
  1136. }
  1137. }
  1138. break;
  1139. case RootBss:
  1140. // For gccgo we use this for all the other global roots.
  1141. enqueue1(&wbuf, (Obj){(byte*)&runtime_m0, sizeof runtime_m0, 0});
  1142. enqueue1(&wbuf, (Obj){(byte*)&runtime_g0, sizeof runtime_g0, 0});
  1143. enqueue1(&wbuf, (Obj){(byte*)&runtime_allg, sizeof runtime_allg, 0});
  1144. enqueue1(&wbuf, (Obj){(byte*)&runtime_allm, sizeof runtime_allm, 0});
  1145. enqueue1(&wbuf, (Obj){(byte*)&runtime_allp, sizeof runtime_allp, 0});
  1146. enqueue1(&wbuf, (Obj){(byte*)&work, sizeof work, 0});
  1147. runtime_proc_scan(&wbuf, enqueue1);
  1148. runtime_MProf_Mark(&wbuf, enqueue1);
  1149. runtime_time_scan(&wbuf, enqueue1);
  1150. runtime_netpoll_scan(&wbuf, enqueue1);
  1151. break;
  1152. case RootFinalizers:
  1153. for(fb=allfin; fb; fb=fb->alllink)
  1154. enqueue1(&wbuf, (Obj){(byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]), 0});
  1155. break;
  1156. case RootSpanTypes:
  1157. // mark span types and MSpan.specials (to walk spans only once)
  1158. h = &runtime_mheap;
  1159. sg = h->sweepgen;
  1160. allspans = h->allspans;
  1161. for(spanidx=0; spanidx<runtime_mheap.nspan; spanidx++) {
  1162. Special *sp;
  1163. SpecialFinalizer *spf;
  1164. s = allspans[spanidx];
  1165. if(s->sweepgen != sg) {
  1166. runtime_printf("sweep %d %d\n", s->sweepgen, sg);
  1167. runtime_throw("gc: unswept span");
  1168. }
  1169. if(s->state != MSpanInUse)
  1170. continue;
  1171. // The garbage collector ignores type pointers stored in MSpan.types:
  1172. // - Compiler-generated types are stored outside of heap.
  1173. // - The reflect package has runtime-generated types cached in its data structures.
  1174. // The garbage collector relies on finding the references via that cache.
  1175. if(s->types.compression == MTypes_Words || s->types.compression == MTypes_Bytes)
  1176. markonly((byte*)s->types.data);
  1177. for(sp = s->specials; sp != nil; sp = sp->next) {
  1178. if(sp->kind != KindSpecialFinalizer)
  1179. continue;
  1180. // don't mark finalized object, but scan it so we
  1181. // retain everything it points to.
  1182. spf = (SpecialFinalizer*)sp;
  1183. // A finalizer can be set for an inner byte of an object, find object beginning.
  1184. p = (void*)((s->start << PageShift) + spf->offset/s->elemsize*s->elemsize);
  1185. enqueue1(&wbuf, (Obj){p, s->elemsize, 0});
  1186. enqueue1(&wbuf, (Obj){(void*)&spf->fn, PtrSize, 0});
  1187. enqueue1(&wbuf, (Obj){(void*)&spf->ft, PtrSize, 0});
  1188. enqueue1(&wbuf, (Obj){(void*)&spf->ot, PtrSize, 0});
  1189. }
  1190. }
  1191. break;
  1192. case RootFlushCaches:
  1193. flushallmcaches();
  1194. break;
  1195. default:
  1196. // the rest is scanning goroutine stacks
  1197. if(i - RootCount >= runtime_allglen)
  1198. runtime_throw("markroot: bad index");
  1199. gp = runtime_allg[i - RootCount];
  1200. // remember when we've first observed the G blocked
  1201. // needed only to output in traceback
  1202. if((gp->status == Gwaiting || gp->status == Gsyscall) && gp->waitsince == 0)
  1203. gp->waitsince = work.tstart;
  1204. addstackroots(gp, &wbuf);
  1205. break;
  1206. }
  1207. if(wbuf)
  1208. scanblock(wbuf, false);
  1209. }
  1210. // Get an empty work buffer off the work.empty list,
  1211. // allocating new buffers as needed.
  1212. static Workbuf*
  1213. getempty(Workbuf *b)
  1214. {
  1215. if(b != nil)
  1216. runtime_lfstackpush(&work.full, &b->node);
  1217. b = (Workbuf*)runtime_lfstackpop(&work.empty);
  1218. if(b == nil) {
  1219. // Need to allocate.
  1220. runtime_lock(&work);
  1221. if(work.nchunk < sizeof *b) {
  1222. work.nchunk = 1<<20;
  1223. work.chunk = runtime_SysAlloc(work.nchunk, &mstats.gc_sys);
  1224. if(work.chunk == nil)
  1225. runtime_throw("runtime: cannot allocate memory");
  1226. }
  1227. b = (Workbuf*)work.chunk;
  1228. work.chunk += sizeof *b;
  1229. work.nchunk -= sizeof *b;
  1230. runtime_unlock(&work);
  1231. }
  1232. b->nobj = 0;
  1233. return b;
  1234. }
  1235. static void
  1236. putempty(Workbuf *b)
  1237. {
  1238. if(CollectStats)
  1239. runtime_xadd64(&gcstats.putempty, 1);
  1240. runtime_lfstackpush(&work.empty, &b->node);
  1241. }
  1242. // Get a full work buffer off the work.full list, or return nil.
  1243. static Workbuf*
  1244. getfull(Workbuf *b)
  1245. {
  1246. M *m;
  1247. int32 i;
  1248. if(CollectStats)
  1249. runtime_xadd64(&gcstats.getfull, 1);
  1250. if(b != nil)
  1251. runtime_lfstackpush(&work.empty, &b->node);
  1252. b = (Workbuf*)runtime_lfstackpop(&work.full);
  1253. if(b != nil || work.nproc == 1)
  1254. return b;
  1255. m = runtime_m();
  1256. runtime_xadd(&work.nwait, +1);
  1257. for(i=0;; i++) {
  1258. if(work.full != 0) {
  1259. runtime_xadd(&work.nwait, -1);
  1260. b = (Workbuf*)runtime_lfstackpop(&work.full);
  1261. if(b != nil)
  1262. return b;
  1263. runtime_xadd(&work.nwait, +1);
  1264. }
  1265. if(work.nwait == work.nproc)
  1266. return nil;
  1267. if(i < 10) {
  1268. m->gcstats.nprocyield++;
  1269. runtime_procyield(20);
  1270. } else if(i < 20) {
  1271. m->gcstats.nosyield++;
  1272. runtime_osyield();
  1273. } else {
  1274. m->gcstats.nsleep++;
  1275. runtime_usleep(100);
  1276. }
  1277. }
  1278. }
  1279. static Workbuf*
  1280. handoff(Workbuf *b)
  1281. {
  1282. M *m;
  1283. int32 n;
  1284. Workbuf *b1;
  1285. m = runtime_m();
  1286. // Make new buffer with half of b's pointers.
  1287. b1 = getempty(nil);
  1288. n = b->nobj/2;
  1289. b->nobj -= n;
  1290. b1->nobj = n;
  1291. runtime_memmove(b1->obj, b->obj+b->nobj, n*sizeof b1->obj[0]);
  1292. m->gcstats.nhandoff++;
  1293. m->gcstats.nhandoffcnt += n;
  1294. // Put b on full list - let first half of b get stolen.
  1295. runtime_lfstackpush(&work.full, &b->node);
  1296. return b1;
  1297. }
  1298. static void
  1299. addstackroots(G *gp, Workbuf **wbufp)
  1300. {
  1301. switch(gp->status){
  1302. default:
  1303. runtime_printf("unexpected G.status %d (goroutine %p %D)\n", gp->status, gp, gp->goid);
  1304. runtime_throw("mark - bad status");
  1305. case Gdead:
  1306. return;
  1307. case Grunning:
  1308. runtime_throw("mark - world not stopped");
  1309. case Grunnable:
  1310. case Gsyscall:
  1311. case Gwaiting:
  1312. break;
  1313. }
  1314. #ifdef USING_SPLIT_STACK
  1315. M *mp;
  1316. void* sp;
  1317. size_t spsize;
  1318. void* next_segment;
  1319. void* next_sp;
  1320. void* initial_sp;
  1321. if(gp == runtime_g()) {
  1322. // Scanning our own stack.
  1323. sp = __splitstack_find(nil, nil, &spsize, &next_segment,
  1324. &next_sp, &initial_sp);
  1325. } else if((mp = gp->m) != nil && mp->helpgc) {
  1326. // gchelper's stack is in active use and has no interesting pointers.
  1327. return;
  1328. } else {
  1329. // Scanning another goroutine's stack.
  1330. // The goroutine is usually asleep (the world is stopped).
  1331. // The exception is that if the goroutine is about to enter or might
  1332. // have just exited a system call, it may be executing code such
  1333. // as schedlock and may have needed to start a new stack segment.
  1334. // Use the stack segment and stack pointer at the time of
  1335. // the system call instead, since that won't change underfoot.
  1336. if(gp->gcstack != nil) {
  1337. sp = gp->gcstack;
  1338. spsize = gp->gcstack_size;
  1339. next_segment = gp->gcnext_segment;
  1340. next_sp = gp->gcnext_sp;
  1341. initial_sp = gp->gcinitial_sp;
  1342. } else {
  1343. sp = __splitstack_find_context(&gp->stack_context[0],
  1344. &spsize, &next_segment,
  1345. &next_sp, &initial_sp);
  1346. }
  1347. }
  1348. if(sp != nil) {
  1349. enqueue1(wbufp, (Obj){sp, spsize, 0});
  1350. while((sp = __splitstack_find(next_segment, next_sp,
  1351. &spsize, &next_segment,
  1352. &next_sp, &initial_sp)) != nil)
  1353. enqueue1(wbufp, (Obj){sp, spsize, 0});
  1354. }
  1355. #else
  1356. M *mp;
  1357. byte* bottom;
  1358. byte* top;
  1359. if(gp == runtime_g()) {
  1360. // Scanning our own stack.
  1361. bottom = (byte*)&gp;
  1362. } else if((mp = gp->m) != nil && mp->helpgc) {
  1363. // gchelper's stack is in active use and has no interesting pointers.
  1364. return;
  1365. } else {
  1366. // Scanning another goroutine's stack.
  1367. // The goroutine is usually asleep (the world is stopped).
  1368. bottom = (byte*)gp->gcnext_sp;
  1369. if(bottom == nil)
  1370. return;
  1371. }
  1372. top = (byte*)gp->gcinitial_sp + gp->gcstack_size;
  1373. if(top > bottom)
  1374. enqueue1(wbufp, (Obj){bottom, top - bottom, 0});
  1375. else
  1376. enqueue1(wbufp, (Obj){top, bottom - top, 0});
  1377. #endif
  1378. }
  1379. void
  1380. runtime_queuefinalizer(void *p, FuncVal *fn, const FuncType *ft, const PtrType *ot)
  1381. {
  1382. FinBlock *block;
  1383. Finalizer *f;
  1384. runtime_lock(&finlock);
  1385. if(finq == nil || finq->cnt == finq->cap) {
  1386. if(finc == nil) {
  1387. finc = runtime_persistentalloc(FinBlockSize, 0, &mstats.gc_sys);
  1388. finc->cap = (FinBlockSize - sizeof(FinBlock)) / sizeof(Finalizer) + 1;
  1389. finc->alllink = allfin;
  1390. allfin = finc;
  1391. }
  1392. block = finc;
  1393. finc = block->next;
  1394. block->next = finq;
  1395. finq = block;
  1396. }
  1397. f = &finq->fin[finq->cnt];
  1398. finq->cnt++;
  1399. f->fn = fn;
  1400. f->ft = ft;
  1401. f->ot = ot;
  1402. f->arg = p;
  1403. runtime_fingwake = true;
  1404. runtime_unlock(&finlock);
  1405. }
  1406. void
  1407. runtime_iterate_finq(void (*callback)(FuncVal*, void*, const FuncType*, const PtrType*))
  1408. {
  1409. FinBlock *fb;
  1410. Finalizer *f;
  1411. int32 i;
  1412. for(fb = allfin; fb; fb = fb->alllink) {
  1413. for(i = 0; i < fb->cnt; i++) {
  1414. f = &fb->fin[i];
  1415. callback(f->fn, f->arg, f->ft, f->ot);
  1416. }
  1417. }
  1418. }
  1419. void
  1420. runtime_MSpan_EnsureSwept(MSpan *s)
  1421. {
  1422. M *m = runtime_m();
  1423. G *g = runtime_g();
  1424. uint32 sg;
  1425. // Caller must disable preemption.
  1426. // Otherwise when this function returns the span can become unswept again
  1427. // (if GC is triggered on another goroutine).
  1428. if(m->locks == 0 && m->mallocing == 0 && g != m->g0)
  1429. runtime_throw("MSpan_EnsureSwept: m is not locked");
  1430. sg = runtime_mheap.sweepgen;
  1431. if(runtime_atomicload(&s->sweepgen) == sg)
  1432. return;
  1433. if(runtime_cas(&s->sweepgen, sg-2, sg-1)) {
  1434. runtime_MSpan_Sweep(s);
  1435. return;
  1436. }
  1437. // unfortunate condition, and we don't have efficient means to wait
  1438. while(runtime_atomicload(&s->sweepgen) != sg)
  1439. runtime_osyield();
  1440. }
  1441. // Sweep frees or collects finalizers for blocks not marked in the mark phase.
  1442. // It clears the mark bits in preparation for the next GC round.
  1443. // Returns true if the span was returned to heap.
  1444. bool
  1445. runtime_MSpan_Sweep(MSpan *s)
  1446. {
  1447. M *m;
  1448. int32 cl, n, npages, nfree;
  1449. uintptr size, off, *bitp, shift, bits;
  1450. uint32 sweepgen;
  1451. byte *p;
  1452. MCache *c;
  1453. byte *arena_start;
  1454. MLink head, *end;
  1455. byte *type_data;
  1456. byte compression;
  1457. uintptr type_data_inc;
  1458. MLink *x;
  1459. Special *special, **specialp, *y;
  1460. bool res, sweepgenset;
  1461. m = runtime_m();
  1462. // It's critical that we enter this function with preemption disabled,
  1463. // GC must not start while we are in the middle of this function.
  1464. if(m->locks == 0 && m->mallocing == 0 && runtime_g() != m->g0)
  1465. runtime_throw("MSpan_Sweep: m is not locked");
  1466. sweepgen = runtime_mheap.sweepgen;
  1467. if(s->state != MSpanInUse || s->sweepgen != sweepgen-1) {
  1468. runtime_printf("MSpan_Sweep: state=%d sweepgen=%d mheap.sweepgen=%d\n",
  1469. s->state, s->sweepgen, sweepgen);
  1470. runtime_throw("MSpan_Sweep: bad span state");
  1471. }
  1472. arena_start = runtime_mheap.arena_start;
  1473. cl = s->sizeclass;
  1474. size = s->elemsize;
  1475. if(cl == 0) {
  1476. n = 1;
  1477. } else {
  1478. // Chunk full of small blocks.
  1479. npages = runtime_class_to_allocnpages[cl];
  1480. n = (npages << PageShift) / size;
  1481. }
  1482. res = false;
  1483. nfree = 0;
  1484. end = &head;
  1485. c = m->mcache;
  1486. sweepgenset = false;
  1487. // mark any free objects in this span so we don't collect them
  1488. for(x = s->freelist; x != nil; x = x->next) {
  1489. // This is markonly(x) but faster because we don't need
  1490. // atomic access and we're guaranteed to be pointing at
  1491. // the head of a valid object.
  1492. off = (uintptr*)x - (uintptr*)runtime_mheap.arena_start;
  1493. bitp = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
  1494. shift = off % wordsPerBitmapWord;
  1495. *bitp |= bitMarked<<shift;
  1496. }
  1497. // Unlink & free special records for any objects we're about to free.
  1498. specialp = &s->specials;
  1499. special = *specialp;
  1500. while(special != nil) {
  1501. // A finalizer can be set for an inner byte of an object, find object beginning.
  1502. p = (byte*)(s->start << PageShift) + special->offset/size*size;
  1503. off = (uintptr*)p - (uintptr*)arena_start;
  1504. bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
  1505. shift = off % wordsPerBitmapWord;
  1506. bits = *bitp>>shift;
  1507. if((bits & (bitAllocated|bitMarked)) == bitAllocated) {
  1508. // Find the exact byte for which the special was setup
  1509. // (as opposed to object beginning).
  1510. p = (byte*)(s->start << PageShift) + special->offset;
  1511. // about to free object: splice out special record
  1512. y = special;
  1513. special = special->next;
  1514. *specialp = special;
  1515. if(!runtime_freespecial(y, p, size, false)) {
  1516. // stop freeing of object if it has a finalizer
  1517. *bitp |= bitMarked << shift;
  1518. }
  1519. } else {
  1520. // object is still live: keep special record
  1521. specialp = &special->next;
  1522. special = *specialp;
  1523. }
  1524. }
  1525. type_data = (byte*)s->types.data;
  1526. type_data_inc = sizeof(uintptr);
  1527. compression = s->types.compression;
  1528. switch(compression) {
  1529. case MTypes_Bytes:
  1530. type_data += 8*sizeof(uintptr);
  1531. type_data_inc = 1;
  1532. break;
  1533. }
  1534. // Sweep through n objects of given size starting at p.
  1535. // This thread owns the span now, so it can manipulate
  1536. // the block bitmap without atomic operations.
  1537. p = (byte*)(s->start << PageShift);
  1538. for(; n > 0; n--, p += size, type_data+=type_data_inc) {
  1539. off = (uintptr*)p - (uintptr*)arena_start;
  1540. bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
  1541. shift = off % wordsPerBitmapWord;
  1542. bits = *bitp>>shift;
  1543. if((bits & bitAllocated) == 0)
  1544. continue;
  1545. if((bits & bitMarked) != 0) {
  1546. *bitp &= ~(bitMarked<<shift);
  1547. continue;
  1548. }
  1549. if(runtime_debug.allocfreetrace)
  1550. runtime_tracefree(p, size);
  1551. // Clear mark and scan bits.
  1552. *bitp &= ~((bitScan|bitMarked)<<shift);
  1553. if(cl == 0) {
  1554. // Free large span.
  1555. runtime_unmarkspan(p, 1<<PageShift);
  1556. s->needzero = 1;
  1557. // important to set sweepgen before returning it to heap
  1558. runtime_atomicstore(&s->sweepgen, sweepgen);
  1559. sweepgenset = true;
  1560. // See note about SysFault vs SysFree in malloc.goc.
  1561. if(runtime_debug.efence)
  1562. runtime_SysFault(p, size);
  1563. else
  1564. runtime_MHeap_Free(&runtime_mheap, s, 1);
  1565. c->local_nlargefree++;
  1566. c->local_largefree += size;
  1567. runtime_xadd64(&mstats.next_gc, -(uint64)(size * (gcpercent + 100)/100));
  1568. res = true;
  1569. } else {
  1570. // Free small object.
  1571. switch(compression) {
  1572. case MTypes_Words:
  1573. *(uintptr*)type_data = 0;
  1574. break;
  1575. case MTypes_Bytes:
  1576. *(byte*)type_data = 0;
  1577. break;
  1578. }
  1579. if(size > 2*sizeof(uintptr))
  1580. ((uintptr*)p)[1] = (uintptr)0xdeaddeaddeaddeadll; // mark as "needs to be zeroed"
  1581. else if(size > sizeof(uintptr))
  1582. ((uintptr*)p)[1] = 0;
  1583. end->next = (MLink*)p;
  1584. end = (MLink*)p;
  1585. nfree++;
  1586. }
  1587. }
  1588. // We need to set s->sweepgen = h->sweepgen only when all blocks are swept,
  1589. // because of the potential for a concurrent free/SetFinalizer.
  1590. // But we need to set it before we make the span available for allocation
  1591. // (return it to heap or mcentral), because allocation code assumes that a
  1592. // span is already swept if available for allocation.
  1593. if(!sweepgenset && nfree == 0) {
  1594. // The span must be in our exclusive ownership until we update sweepgen,
  1595. // check for potential races.
  1596. if(s->state != MSpanInUse || s->sweepgen != sweepgen-1) {
  1597. runtime_printf("MSpan_Sweep: state=%d sweepgen=%d mheap.sweepgen=%d\n",
  1598. s->state, s->sweepgen, sweepgen);
  1599. runtime_throw("MSpan_Sweep: bad span state after sweep");
  1600. }
  1601. runtime_atomicstore(&s->sweepgen, sweepgen);
  1602. }
  1603. if(nfree > 0) {
  1604. c->local_nsmallfree[cl] += nfree;
  1605. c->local_cachealloc -= nfree * size;
  1606. runtime_xadd64(&mstats.next_gc, -(uint64)(nfree * size * (gcpercent + 100)/100));
  1607. res = runtime_MCentral_FreeSpan(&runtime_mheap.central[cl], s, nfree, head.next, end);
  1608. //MCentral_FreeSpan updates sweepgen
  1609. }
  1610. return res;
  1611. }
  1612. // State of background sweep.
  1613. // Protected by gclock.
  1614. static struct
  1615. {
  1616. G* g;
  1617. bool parked;
  1618. MSpan** spans;
  1619. uint32 nspan;
  1620. uint32 spanidx;
  1621. } sweep;
  1622. // background sweeping goroutine
  1623. static void
  1624. bgsweep(void* dummy __attribute__ ((unused)))
  1625. {
  1626. runtime_g()->issystem = 1;
  1627. for(;;) {
  1628. while(runtime_sweepone() != (uintptr)-1) {
  1629. gcstats.nbgsweep++;
  1630. runtime_gosched();
  1631. }
  1632. runtime_lock(&gclock);
  1633. if(!runtime_mheap.sweepdone) {
  1634. // It's possible if GC has happened between sweepone has
  1635. // returned -1 and gclock lock.
  1636. runtime_unlock(&gclock);
  1637. continue;
  1638. }
  1639. sweep.parked = true;
  1640. runtime_g()->isbackground = true;
  1641. runtime_parkunlock(&gclock, "GC sweep wait");
  1642. runtime_g()->isbackground = false;
  1643. }
  1644. }
  1645. // sweeps one span
  1646. // returns number of pages returned to heap, or -1 if there is nothing to sweep
  1647. uintptr
  1648. runtime_sweepone(void)
  1649. {
  1650. M *m = runtime_m();
  1651. MSpan *s;
  1652. uint32 idx, sg;
  1653. uintptr npages;
  1654. // increment locks to ensure that the goroutine is not preempted
  1655. // in the middle of sweep thus leaving the span in an inconsistent state for next GC
  1656. m->locks++;
  1657. sg = runtime_mheap.sweepgen;
  1658. for(;;) {
  1659. idx = runtime_xadd(&sweep.spanidx, 1) - 1;
  1660. if(idx >= sweep.nspan) {
  1661. runtime_mheap.sweepdone = true;
  1662. m->locks--;
  1663. return (uintptr)-1;
  1664. }
  1665. s = sweep.spans[idx];
  1666. if(s->state != MSpanInUse) {
  1667. s->sweepgen = sg;
  1668. continue;
  1669. }
  1670. if(s->sweepgen != sg-2 || !runtime_cas(&s->sweepgen, sg-2, sg-1))
  1671. continue;
  1672. if(s->incache)
  1673. runtime_throw("sweep of incache span");
  1674. npages = s->npages;
  1675. if(!runtime_MSpan_Sweep(s))
  1676. npages = 0;
  1677. m->locks--;
  1678. return npages;
  1679. }
  1680. }
  1681. static void
  1682. dumpspan(uint32 idx)
  1683. {
  1684. int32 sizeclass, n, npages, i, column;
  1685. uintptr size;
  1686. byte *p;
  1687. byte *arena_start;
  1688. MSpan *s;
  1689. bool allocated;
  1690. s = runtime_mheap.allspans[idx];
  1691. if(s->state != MSpanInUse)
  1692. return;
  1693. arena_start = runtime_mheap.arena_start;
  1694. p = (byte*)(s->start << PageShift);
  1695. sizeclass = s->sizeclass;
  1696. size = s->elemsize;
  1697. if(sizeclass == 0) {
  1698. n = 1;
  1699. } else {
  1700. npages = runtime_class_to_allocnpages[sizeclass];
  1701. n = (npages << PageShift) / size;
  1702. }
  1703. runtime_printf("%p .. %p:\n", p, p+n*size);
  1704. column = 0;
  1705. for(; n>0; n--, p+=size) {
  1706. uintptr off, *bitp, shift, bits;
  1707. off = (uintptr*)p - (uintptr*)arena_start;
  1708. bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
  1709. shift = off % wordsPerBitmapWord;
  1710. bits = *bitp>>shift;
  1711. allocated = ((bits & bitAllocated) != 0);
  1712. for(i=0; (uint32)i<size; i+=sizeof(void*)) {
  1713. if(column == 0) {
  1714. runtime_printf("\t");
  1715. }
  1716. if(i == 0) {
  1717. runtime_printf(allocated ? "(" : "[");
  1718. runtime_printf("%p: ", p+i);
  1719. } else {
  1720. runtime_printf(" ");
  1721. }
  1722. runtime_printf("%p", *(void**)(p+i));
  1723. if(i+sizeof(void*) >= size) {
  1724. runtime_printf(allocated ? ") " : "] ");
  1725. }
  1726. column++;
  1727. if(column == 8) {
  1728. runtime_printf("\n");
  1729. column = 0;
  1730. }
  1731. }
  1732. }
  1733. runtime_printf("\n");
  1734. }
  1735. // A debugging function to dump the contents of memory
  1736. void
  1737. runtime_memorydump(void)
  1738. {
  1739. uint32 spanidx;
  1740. for(spanidx=0; spanidx<runtime_mheap.nspan; spanidx++) {
  1741. dumpspan(spanidx);
  1742. }
  1743. }
  1744. void
  1745. runtime_gchelper(void)
  1746. {
  1747. uint32 nproc;
  1748. runtime_m()->traceback = 2;
  1749. gchelperstart();
  1750. // parallel mark for over gc roots
  1751. runtime_parfordo(work.markfor);
  1752. // help other threads scan secondary blocks
  1753. scanblock(nil, true);
  1754. bufferList[runtime_m()->helpgc].busy = 0;
  1755. nproc = work.nproc; // work.nproc can change right after we increment work.ndone
  1756. if(runtime_xadd(&work.ndone, +1) == nproc-1)
  1757. runtime_notewakeup(&work.alldone);
  1758. runtime_m()->traceback = 0;
  1759. }
  1760. static void
  1761. cachestats(void)
  1762. {
  1763. MCache *c;
  1764. P *p, **pp;
  1765. for(pp=runtime_allp; (p=*pp) != nil; pp++) {
  1766. c = p->mcache;
  1767. if(c==nil)
  1768. continue;
  1769. runtime_purgecachedstats(c);
  1770. }
  1771. }
  1772. static void
  1773. flushallmcaches(void)
  1774. {
  1775. P *p, **pp;
  1776. MCache *c;
  1777. // Flush MCache's to MCentral.
  1778. for(pp=runtime_allp; (p=*pp) != nil; pp++) {
  1779. c = p->mcache;
  1780. if(c==nil)
  1781. continue;
  1782. runtime_MCache_ReleaseAll(c);
  1783. }
  1784. }
  1785. void
  1786. runtime_updatememstats(GCStats *stats)
  1787. {
  1788. M *mp;
  1789. MSpan *s;
  1790. uint32 i;
  1791. uint64 stacks_inuse, smallfree;
  1792. uint64 *src, *dst;
  1793. if(stats)
  1794. runtime_memclr((byte*)stats, sizeof(*stats));
  1795. stacks_inuse = 0;
  1796. for(mp=runtime_allm; mp; mp=mp->alllink) {
  1797. //stacks_inuse += mp->stackinuse*FixedStack;
  1798. if(stats) {
  1799. src = (uint64*)&mp->gcstats;
  1800. dst = (uint64*)stats;
  1801. for(i=0; i<sizeof(*stats)/sizeof(uint64); i++)
  1802. dst[i] += src[i];
  1803. runtime_memclr((byte*)&mp->gcstats, sizeof(mp->gcstats));
  1804. }
  1805. }
  1806. mstats.stacks_inuse = stacks_inuse;
  1807. mstats.mcache_inuse = runtime_mheap.cachealloc.inuse;
  1808. mstats.mspan_inuse = runtime_mheap.spanalloc.inuse;
  1809. mstats.sys = mstats.heap_sys + mstats.stacks_sys + mstats.mspan_sys +
  1810. mstats.mcache_sys + mstats.buckhash_sys + mstats.gc_sys + mstats.other_sys;
  1811. // Calculate memory allocator stats.
  1812. // During program execution we only count number of frees and amount of freed memory.
  1813. // Current number of alive object in the heap and amount of alive heap memory
  1814. // are calculated by scanning all spans.
  1815. // Total number of mallocs is calculated as number of frees plus number of alive objects.
  1816. // Similarly, total amount of allocated memory is calculated as amount of freed memory
  1817. // plus amount of alive heap memory.
  1818. mstats.alloc = 0;
  1819. mstats.total_alloc = 0;
  1820. mstats.nmalloc = 0;
  1821. mstats.nfree = 0;
  1822. for(i = 0; i < nelem(mstats.by_size); i++) {
  1823. mstats.by_size[i].nmalloc = 0;
  1824. mstats.by_size[i].nfree = 0;
  1825. }
  1826. // Flush MCache's to MCentral.
  1827. flushallmcaches();
  1828. // Aggregate local stats.
  1829. cachestats();
  1830. // Scan all spans and count number of alive objects.
  1831. for(i = 0; i < runtime_mheap.nspan; i++) {
  1832. s = runtime_mheap.allspans[i];
  1833. if(s->state != MSpanInUse)
  1834. continue;
  1835. if(s->sizeclass == 0) {
  1836. mstats.nmalloc++;
  1837. mstats.alloc += s->elemsize;
  1838. } else {
  1839. mstats.nmalloc += s->ref;
  1840. mstats.by_size[s->sizeclass].nmalloc += s->ref;
  1841. mstats.alloc += s->ref*s->elemsize;
  1842. }
  1843. }
  1844. // Aggregate by size class.
  1845. smallfree = 0;
  1846. mstats.nfree = runtime_mheap.nlargefree;
  1847. for(i = 0; i < nelem(mstats.by_size); i++) {
  1848. mstats.nfree += runtime_mheap.nsmallfree[i];
  1849. mstats.by_size[i].nfree = runtime_mheap.nsmallfree[i];
  1850. mstats.by_size[i].nmalloc += runtime_mheap.nsmallfree[i];
  1851. smallfree += runtime_mheap.nsmallfree[i] * runtime_class_to_size[i];
  1852. }
  1853. mstats.nmalloc += mstats.nfree;
  1854. // Calculate derived stats.
  1855. mstats.total_alloc = mstats.alloc + runtime_mheap.largefree + smallfree;
  1856. mstats.heap_alloc = mstats.alloc;
  1857. mstats.heap_objects = mstats.nmalloc - mstats.nfree;
  1858. }
  1859. // Structure of arguments passed to function gc().
  1860. // This allows the arguments to be passed via runtime_mcall.
  1861. struct gc_args
  1862. {
  1863. int64 start_time; // start time of GC in ns (just before stoptheworld)
  1864. bool eagersweep;
  1865. };
  1866. static void gc(struct gc_args *args);
  1867. static void mgc(G *gp);
  1868. static int32
  1869. readgogc(void)
  1870. {
  1871. const byte *p;
  1872. p = runtime_getenv("GOGC");
  1873. if(p == nil || p[0] == '\0')
  1874. return 100;
  1875. if(runtime_strcmp((const char *)p, "off") == 0)
  1876. return -1;
  1877. return runtime_atoi(p);
  1878. }
  1879. // force = 1 - do GC regardless of current heap usage
  1880. // force = 2 - go GC and eager sweep
  1881. void
  1882. runtime_gc(int32 force)
  1883. {
  1884. M *m;
  1885. G *g;
  1886. struct gc_args a;
  1887. int32 i;
  1888. // The atomic operations are not atomic if the uint64s
  1889. // are not aligned on uint64 boundaries. This has been
  1890. // a problem in the past.
  1891. if((((uintptr)&work.empty) & 7) != 0)
  1892. runtime_throw("runtime: gc work buffer is misaligned");
  1893. if((((uintptr)&work.full) & 7) != 0)
  1894. runtime_throw("runtime: gc work buffer is misaligned");
  1895. // Make sure all registers are saved on stack so that
  1896. // scanstack sees them.
  1897. __builtin_unwind_init();
  1898. // The gc is turned off (via enablegc) until
  1899. // the bootstrap has completed.
  1900. // Also, malloc gets called in the guts
  1901. // of a number of libraries that might be
  1902. // holding locks. To avoid priority inversion
  1903. // problems, don't bother trying to run gc
  1904. // while holding a lock. The next mallocgc
  1905. // without a lock will do the gc instead.
  1906. m = runtime_m();
  1907. if(!mstats.enablegc || runtime_g() == m->g0 || m->locks > 0 || runtime_panicking)
  1908. return;
  1909. if(gcpercent == GcpercentUnknown) { // first time through
  1910. runtime_lock(&runtime_mheap);
  1911. if(gcpercent == GcpercentUnknown)
  1912. gcpercent = readgogc();
  1913. runtime_unlock(&runtime_mheap);
  1914. }
  1915. if(gcpercent < 0)
  1916. return;
  1917. runtime_semacquire(&runtime_worldsema, false);
  1918. if(force==0 && mstats.heap_alloc < mstats.next_gc) {
  1919. // typically threads which lost the race to grab
  1920. // worldsema exit here when gc is done.
  1921. runtime_semrelease(&runtime_worldsema);
  1922. return;
  1923. }
  1924. // Ok, we're doing it! Stop everybody else
  1925. a.start_time = runtime_nanotime();
  1926. a.eagersweep = force >= 2;
  1927. m->gcing = 1;
  1928. runtime_stoptheworld();
  1929. clearpools();
  1930. // Run gc on the g0 stack. We do this so that the g stack
  1931. // we're currently running on will no longer change. Cuts
  1932. // the root set down a bit (g0 stacks are not scanned, and
  1933. // we don't need to scan gc's internal state). Also an
  1934. // enabler for copyable stacks.
  1935. for(i = 0; i < (runtime_debug.gctrace > 1 ? 2 : 1); i++) {
  1936. if(i > 0)
  1937. a.start_time = runtime_nanotime();
  1938. // switch to g0, call gc(&a), then switch back
  1939. g = runtime_g();
  1940. g->param = &a;
  1941. g->status = Gwaiting;
  1942. g->waitreason = "garbage collection";
  1943. runtime_mcall(mgc);
  1944. m = runtime_m();
  1945. }
  1946. // all done
  1947. m->gcing = 0;
  1948. m->locks++;
  1949. runtime_semrelease(&runtime_worldsema);
  1950. runtime_starttheworld();
  1951. m->locks--;
  1952. // now that gc is done, kick off finalizer thread if needed
  1953. if(!ConcurrentSweep) {
  1954. // give the queued finalizers, if any, a chance to run
  1955. runtime_gosched();
  1956. } else {
  1957. // For gccgo, let other goroutines run.
  1958. runtime_gosched();
  1959. }
  1960. }
  1961. static void
  1962. mgc(G *gp)
  1963. {
  1964. gc(gp->param);
  1965. gp->param = nil;
  1966. gp->status = Grunning;
  1967. runtime_gogo(gp);
  1968. }
  1969. static void
  1970. gc(struct gc_args *args)
  1971. {
  1972. M *m;
  1973. int64 t0, t1, t2, t3, t4;
  1974. uint64 heap0, heap1, obj, ninstr;
  1975. GCStats stats;
  1976. uint32 i;
  1977. // Eface eface;
  1978. m = runtime_m();
  1979. if(runtime_debug.allocfreetrace)
  1980. runtime_tracegc();
  1981. m->traceback = 2;
  1982. t0 = args->start_time;
  1983. work.tstart = args->start_time;
  1984. if(CollectStats)
  1985. runtime_memclr((byte*)&gcstats, sizeof(gcstats));
  1986. m->locks++; // disable gc during mallocs in parforalloc
  1987. if(work.markfor == nil)
  1988. work.markfor = runtime_parforalloc(MaxGcproc);
  1989. m->locks--;
  1990. t1 = 0;
  1991. if(runtime_debug.gctrace)
  1992. t1 = runtime_nanotime();
  1993. // Sweep what is not sweeped by bgsweep.
  1994. while(runtime_sweepone() != (uintptr)-1)
  1995. gcstats.npausesweep++;
  1996. work.nwait = 0;
  1997. work.ndone = 0;
  1998. work.nproc = runtime_gcprocs();
  1999. runtime_parforsetup(work.markfor, work.nproc, RootCount + runtime_allglen, nil, false, markroot);
  2000. if(work.nproc > 1) {
  2001. runtime_noteclear(&work.alldone);
  2002. runtime_helpgc(work.nproc);
  2003. }
  2004. t2 = 0;
  2005. if(runtime_debug.gctrace)
  2006. t2 = runtime_nanotime();
  2007. gchelperstart();
  2008. runtime_parfordo(work.markfor);
  2009. scanblock(nil, true);
  2010. t3 = 0;
  2011. if(runtime_debug.gctrace)
  2012. t3 = runtime_nanotime();
  2013. bufferList[m->helpgc].busy = 0;
  2014. if(work.nproc > 1)
  2015. runtime_notesleep(&work.alldone);
  2016. cachestats();
  2017. // next_gc calculation is tricky with concurrent sweep since we don't know size of live heap
  2018. // estimate what was live heap size after previous GC (for tracing only)
  2019. heap0 = mstats.next_gc*100/(gcpercent+100);
  2020. // conservatively set next_gc to high value assuming that everything is live
  2021. // concurrent/lazy sweep will reduce this number while discovering new garbage
  2022. mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100;
  2023. t4 = runtime_nanotime();
  2024. mstats.last_gc = runtime_unixnanotime(); // must be Unix time to make sense to user
  2025. mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t4 - t0;
  2026. mstats.pause_total_ns += t4 - t0;
  2027. mstats.numgc++;
  2028. if(mstats.debuggc)
  2029. runtime_printf("pause %D\n", t4-t0);
  2030. if(runtime_debug.gctrace) {
  2031. heap1 = mstats.heap_alloc;
  2032. runtime_updatememstats(&stats);
  2033. if(heap1 != mstats.heap_alloc) {
  2034. runtime_printf("runtime: mstats skew: heap=%D/%D\n", heap1, mstats.heap_alloc);
  2035. runtime_throw("mstats skew");
  2036. }
  2037. obj = mstats.nmalloc - mstats.nfree;
  2038. stats.nprocyield += work.markfor->nprocyield;
  2039. stats.nosyield += work.markfor->nosyield;
  2040. stats.nsleep += work.markfor->nsleep;
  2041. runtime_printf("gc%d(%d): %D+%D+%D+%D us, %D -> %D MB, %D (%D-%D) objects,"
  2042. " %d/%d/%d sweeps,"
  2043. " %D(%D) handoff, %D(%D) steal, %D/%D/%D yields\n",
  2044. mstats.numgc, work.nproc, (t1-t0)/1000, (t2-t1)/1000, (t3-t2)/1000, (t4-t3)/1000,
  2045. heap0>>20, heap1>>20, obj,
  2046. mstats.nmalloc, mstats.nfree,
  2047. sweep.nspan, gcstats.nbgsweep, gcstats.npausesweep,
  2048. stats.nhandoff, stats.nhandoffcnt,
  2049. work.markfor->nsteal, work.markfor->nstealcnt,
  2050. stats.nprocyield, stats.nosyield, stats.nsleep);
  2051. gcstats.nbgsweep = gcstats.npausesweep = 0;
  2052. if(CollectStats) {
  2053. runtime_printf("scan: %D bytes, %D objects, %D untyped, %D types from MSpan\n",
  2054. gcstats.nbytes, gcstats.obj.cnt, gcstats.obj.notype, gcstats.obj.typelookup);
  2055. if(gcstats.ptr.cnt != 0)
  2056. runtime_printf("avg ptrbufsize: %D (%D/%D)\n",
  2057. gcstats.ptr.sum/gcstats.ptr.cnt, gcstats.ptr.sum, gcstats.ptr.cnt);
  2058. if(gcstats.obj.cnt != 0)
  2059. runtime_printf("avg nobj: %D (%D/%D)\n",
  2060. gcstats.obj.sum/gcstats.obj.cnt, gcstats.obj.sum, gcstats.obj.cnt);
  2061. runtime_printf("rescans: %D, %D bytes\n", gcstats.rescan, gcstats.rescanbytes);
  2062. runtime_printf("instruction counts:\n");
  2063. ninstr = 0;
  2064. for(i=0; i<nelem(gcstats.instr); i++) {
  2065. runtime_printf("\t%d:\t%D\n", i, gcstats.instr[i]);
  2066. ninstr += gcstats.instr[i];
  2067. }
  2068. runtime_printf("\ttotal:\t%D\n", ninstr);
  2069. runtime_printf("putempty: %D, getfull: %D\n", gcstats.putempty, gcstats.getfull);
  2070. runtime_printf("markonly base lookup: bit %D word %D span %D\n", gcstats.markonly.foundbit, gcstats.markonly.foundword, gcstats.markonly.foundspan);
  2071. runtime_printf("flushptrbuf base lookup: bit %D word %D span %D\n", gcstats.flushptrbuf.foundbit, gcstats.flushptrbuf.foundword, gcstats.flushptrbuf.foundspan);
  2072. }
  2073. }
  2074. // We cache current runtime_mheap.allspans array in sweep.spans,
  2075. // because the former can be resized and freed.
  2076. // Otherwise we would need to take heap lock every time
  2077. // we want to convert span index to span pointer.
  2078. // Free the old cached array if necessary.
  2079. if(sweep.spans && sweep.spans != runtime_mheap.allspans)
  2080. runtime_SysFree(sweep.spans, sweep.nspan*sizeof(sweep.spans[0]), &mstats.other_sys);
  2081. // Cache the current array.
  2082. runtime_mheap.sweepspans = runtime_mheap.allspans;
  2083. runtime_mheap.sweepgen += 2;
  2084. runtime_mheap.sweepdone = false;
  2085. sweep.spans = runtime_mheap.allspans;
  2086. sweep.nspan = runtime_mheap.nspan;
  2087. sweep.spanidx = 0;
  2088. // Temporary disable concurrent sweep, because we see failures on builders.
  2089. if(ConcurrentSweep && !args->eagersweep) {
  2090. runtime_lock(&gclock);
  2091. if(sweep.g == nil)
  2092. sweep.g = __go_go(bgsweep, nil);
  2093. else if(sweep.parked) {
  2094. sweep.parked = false;
  2095. runtime_ready(sweep.g);
  2096. }
  2097. runtime_unlock(&gclock);
  2098. } else {
  2099. // Sweep all spans eagerly.
  2100. while(runtime_sweepone() != (uintptr)-1)
  2101. gcstats.npausesweep++;
  2102. // Do an additional mProf_GC, because all 'free' events are now real as well.
  2103. runtime_MProf_GC();
  2104. }
  2105. runtime_MProf_GC();
  2106. m->traceback = 0;
  2107. }
  2108. extern uintptr runtime_sizeof_C_MStats
  2109. __asm__ (GOSYM_PREFIX "runtime.Sizeof_C_MStats");
  2110. void runtime_ReadMemStats(MStats *)
  2111. __asm__ (GOSYM_PREFIX "runtime.ReadMemStats");
  2112. void
  2113. runtime_ReadMemStats(MStats *stats)
  2114. {
  2115. M *m;
  2116. // Have to acquire worldsema to stop the world,
  2117. // because stoptheworld can only be used by
  2118. // one goroutine at a time, and there might be
  2119. // a pending garbage collection already calling it.
  2120. runtime_semacquire(&runtime_worldsema, false);
  2121. m = runtime_m();
  2122. m->gcing = 1;
  2123. runtime_stoptheworld();
  2124. runtime_updatememstats(nil);
  2125. // Size of the trailing by_size array differs between Go and C,
  2126. // NumSizeClasses was changed, but we can not change Go struct because of backward compatibility.
  2127. runtime_memmove(stats, &mstats, runtime_sizeof_C_MStats);
  2128. m->gcing = 0;
  2129. m->locks++;
  2130. runtime_semrelease(&runtime_worldsema);
  2131. runtime_starttheworld();
  2132. m->locks--;
  2133. }
  2134. void runtime_debug_readGCStats(Slice*)
  2135. __asm__("runtime_debug.readGCStats");
  2136. void
  2137. runtime_debug_readGCStats(Slice *pauses)
  2138. {
  2139. uint64 *p;
  2140. uint32 i, n;
  2141. // Calling code in runtime/debug should make the slice large enough.
  2142. if((size_t)pauses->cap < nelem(mstats.pause_ns)+3)
  2143. runtime_throw("runtime: short slice passed to readGCStats");
  2144. // Pass back: pauses, last gc (absolute time), number of gc, total pause ns.
  2145. p = (uint64*)pauses->array;
  2146. runtime_lock(&runtime_mheap);
  2147. n = mstats.numgc;
  2148. if(n > nelem(mstats.pause_ns))
  2149. n = nelem(mstats.pause_ns);
  2150. // The pause buffer is circular. The most recent pause is at
  2151. // pause_ns[(numgc-1)%nelem(pause_ns)], and then backward
  2152. // from there to go back farther in time. We deliver the times
  2153. // most recent first (in p[0]).
  2154. for(i=0; i<n; i++)
  2155. p[i] = mstats.pause_ns[(mstats.numgc-1-i)%nelem(mstats.pause_ns)];
  2156. p[n] = mstats.last_gc;
  2157. p[n+1] = mstats.numgc;
  2158. p[n+2] = mstats.pause_total_ns;
  2159. runtime_unlock(&runtime_mheap);
  2160. pauses->__count = n+3;
  2161. }
  2162. int32
  2163. runtime_setgcpercent(int32 in) {
  2164. int32 out;
  2165. runtime_lock(&runtime_mheap);
  2166. if(gcpercent == GcpercentUnknown)
  2167. gcpercent = readgogc();
  2168. out = gcpercent;
  2169. if(in < 0)
  2170. in = -1;
  2171. gcpercent = in;
  2172. runtime_unlock(&runtime_mheap);
  2173. return out;
  2174. }
  2175. static void
  2176. gchelperstart(void)
  2177. {
  2178. M *m;
  2179. m = runtime_m();
  2180. if(m->helpgc < 0 || m->helpgc >= MaxGcproc)
  2181. runtime_throw("gchelperstart: bad m->helpgc");
  2182. if(runtime_xchg(&bufferList[m->helpgc].busy, 1))
  2183. runtime_throw("gchelperstart: already busy");
  2184. if(runtime_g() != m->g0)
  2185. runtime_throw("gchelper not running on g0 stack");
  2186. }
  2187. static void
  2188. runfinq(void* dummy __attribute__ ((unused)))
  2189. {
  2190. Finalizer *f;
  2191. FinBlock *fb, *next;
  2192. uint32 i;
  2193. Eface ef;
  2194. Iface iface;
  2195. // This function blocks for long periods of time, and because it is written in C
  2196. // we have no liveness information. Zero everything so that uninitialized pointers
  2197. // do not cause memory leaks.
  2198. f = nil;
  2199. fb = nil;
  2200. next = nil;
  2201. i = 0;
  2202. ef.__type_descriptor = nil;
  2203. ef.__object = nil;
  2204. // force flush to memory
  2205. USED(&f);
  2206. USED(&fb);
  2207. USED(&next);
  2208. USED(&i);
  2209. USED(&ef);
  2210. for(;;) {
  2211. runtime_lock(&finlock);
  2212. fb = finq;
  2213. finq = nil;
  2214. if(fb == nil) {
  2215. runtime_fingwait = true;
  2216. runtime_g()->isbackground = true;
  2217. runtime_parkunlock(&finlock, "finalizer wait");
  2218. runtime_g()->isbackground = false;
  2219. continue;
  2220. }
  2221. runtime_unlock(&finlock);
  2222. for(; fb; fb=next) {
  2223. next = fb->next;
  2224. for(i=0; i<(uint32)fb->cnt; i++) {
  2225. const Type *fint;
  2226. void *param;
  2227. f = &fb->fin[i];
  2228. fint = ((const Type**)f->ft->__in.array)[0];
  2229. if((fint->__code & kindMask) == KindPtr) {
  2230. // direct use of pointer
  2231. param = &f->arg;
  2232. } else if(((const InterfaceType*)fint)->__methods.__count == 0) {
  2233. // convert to empty interface
  2234. ef.__type_descriptor = (const Type*)f->ot;
  2235. ef.__object = f->arg;
  2236. param = &ef;
  2237. } else {
  2238. // convert to interface with methods
  2239. iface.__methods = __go_convert_interface_2((const Type*)fint,
  2240. (const Type*)f->ot,
  2241. 1);
  2242. iface.__object = f->arg;
  2243. if(iface.__methods == nil)
  2244. runtime_throw("invalid type conversion in runfinq");
  2245. param = &iface;
  2246. }
  2247. reflect_call(f->ft, f->fn, 0, 0, &param, nil);
  2248. f->fn = nil;
  2249. f->arg = nil;
  2250. f->ot = nil;
  2251. }
  2252. fb->cnt = 0;
  2253. runtime_lock(&finlock);
  2254. fb->next = finc;
  2255. finc = fb;
  2256. runtime_unlock(&finlock);
  2257. }
  2258. // Zero everything that's dead, to avoid memory leaks.
  2259. // See comment at top of function.
  2260. f = nil;
  2261. fb = nil;
  2262. next = nil;
  2263. i = 0;
  2264. ef.__type_descriptor = nil;
  2265. ef.__object = nil;
  2266. runtime_gc(1); // trigger another gc to clean up the finalized objects, if possible
  2267. }
  2268. }
  2269. void
  2270. runtime_createfing(void)
  2271. {
  2272. if(fing != nil)
  2273. return;
  2274. // Here we use gclock instead of finlock,
  2275. // because newproc1 can allocate, which can cause on-demand span sweep,
  2276. // which can queue finalizers, which would deadlock.
  2277. runtime_lock(&gclock);
  2278. if(fing == nil)
  2279. fing = __go_go(runfinq, nil);
  2280. runtime_unlock(&gclock);
  2281. }
  2282. G*
  2283. runtime_wakefing(void)
  2284. {
  2285. G *res;
  2286. res = nil;
  2287. runtime_lock(&finlock);
  2288. if(runtime_fingwait && runtime_fingwake) {
  2289. runtime_fingwait = false;
  2290. runtime_fingwake = false;
  2291. res = fing;
  2292. }
  2293. runtime_unlock(&finlock);
  2294. return res;
  2295. }
  2296. void
  2297. runtime_marknogc(void *v)
  2298. {
  2299. uintptr *b, off, shift;
  2300. off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset
  2301. b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
  2302. shift = off % wordsPerBitmapWord;
  2303. *b = (*b & ~(bitAllocated<<shift)) | bitBlockBoundary<<shift;
  2304. }
  2305. void
  2306. runtime_markscan(void *v)
  2307. {
  2308. uintptr *b, off, shift;
  2309. off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset
  2310. b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
  2311. shift = off % wordsPerBitmapWord;
  2312. *b |= bitScan<<shift;
  2313. }
  2314. // mark the block at v as freed.
  2315. void
  2316. runtime_markfreed(void *v)
  2317. {
  2318. uintptr *b, off, shift;
  2319. if(0)
  2320. runtime_printf("markfreed %p\n", v);
  2321. if((byte*)v > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
  2322. runtime_throw("markfreed: bad pointer");
  2323. off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset
  2324. b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
  2325. shift = off % wordsPerBitmapWord;
  2326. *b = (*b & ~(bitMask<<shift)) | (bitAllocated<<shift);
  2327. }
  2328. // check that the block at v of size n is marked freed.
  2329. void
  2330. runtime_checkfreed(void *v, uintptr n)
  2331. {
  2332. uintptr *b, bits, off, shift;
  2333. if(!runtime_checking)
  2334. return;
  2335. if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
  2336. return; // not allocated, so okay
  2337. off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset
  2338. b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
  2339. shift = off % wordsPerBitmapWord;
  2340. bits = *b>>shift;
  2341. if((bits & bitAllocated) != 0) {
  2342. runtime_printf("checkfreed %p+%p: off=%p have=%p\n",
  2343. v, n, off, bits & bitMask);
  2344. runtime_throw("checkfreed: not freed");
  2345. }
  2346. }
  2347. // mark the span of memory at v as having n blocks of the given size.
  2348. // if leftover is true, there is left over space at the end of the span.
  2349. void
  2350. runtime_markspan(void *v, uintptr size, uintptr n, bool leftover)
  2351. {
  2352. uintptr *b, *b0, off, shift, i, x;
  2353. byte *p;
  2354. if((byte*)v+size*n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
  2355. runtime_throw("markspan: bad pointer");
  2356. if(runtime_checking) {
  2357. // bits should be all zero at the start
  2358. off = (byte*)v + size - runtime_mheap.arena_start;
  2359. b = (uintptr*)(runtime_mheap.arena_start - off/wordsPerBitmapWord);
  2360. for(i = 0; i < size/PtrSize/wordsPerBitmapWord; i++) {
  2361. if(b[i] != 0)
  2362. runtime_throw("markspan: span bits not zero");
  2363. }
  2364. }
  2365. p = v;
  2366. if(leftover) // mark a boundary just past end of last block too
  2367. n++;
  2368. b0 = nil;
  2369. x = 0;
  2370. for(; n-- > 0; p += size) {
  2371. // Okay to use non-atomic ops here, because we control
  2372. // the entire span, and each bitmap word has bits for only
  2373. // one span, so no other goroutines are changing these
  2374. // bitmap words.
  2375. off = (uintptr*)p - (uintptr*)runtime_mheap.arena_start; // word offset
  2376. b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
  2377. shift = off % wordsPerBitmapWord;
  2378. if(b0 != b) {
  2379. if(b0 != nil)
  2380. *b0 = x;
  2381. b0 = b;
  2382. x = 0;
  2383. }
  2384. x |= bitAllocated<<shift;
  2385. }
  2386. *b0 = x;
  2387. }
  2388. // unmark the span of memory at v of length n bytes.
  2389. void
  2390. runtime_unmarkspan(void *v, uintptr n)
  2391. {
  2392. uintptr *p, *b, off;
  2393. if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
  2394. runtime_throw("markspan: bad pointer");
  2395. p = v;
  2396. off = p - (uintptr*)runtime_mheap.arena_start; // word offset
  2397. if(off % wordsPerBitmapWord != 0)
  2398. runtime_throw("markspan: unaligned pointer");
  2399. b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
  2400. n /= PtrSize;
  2401. if(n%wordsPerBitmapWord != 0)
  2402. runtime_throw("unmarkspan: unaligned length");
  2403. // Okay to use non-atomic ops here, because we control
  2404. // the entire span, and each bitmap word has bits for only
  2405. // one span, so no other goroutines are changing these
  2406. // bitmap words.
  2407. n /= wordsPerBitmapWord;
  2408. while(n-- > 0)
  2409. *b-- = 0;
  2410. }
  2411. void
  2412. runtime_MHeap_MapBits(MHeap *h)
  2413. {
  2414. size_t page_size;
  2415. // Caller has added extra mappings to the arena.
  2416. // Add extra mappings of bitmap words as needed.
  2417. // We allocate extra bitmap pieces in chunks of bitmapChunk.
  2418. enum {
  2419. bitmapChunk = 8192
  2420. };
  2421. uintptr n;
  2422. n = (h->arena_used - h->arena_start) / wordsPerBitmapWord;
  2423. n = ROUND(n, bitmapChunk);
  2424. n = ROUND(n, PageSize);
  2425. page_size = getpagesize();
  2426. n = ROUND(n, page_size);
  2427. if(h->bitmap_mapped >= n)
  2428. return;
  2429. runtime_SysMap(h->arena_start - n, n - h->bitmap_mapped, h->arena_reserved, &mstats.gc_sys);
  2430. h->bitmap_mapped = n;
  2431. }