alloc.nim 50 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2012 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. # Low level allocator for Nim. Has been designed to support the GC.
  10. {.push profiler:off.}
  11. include osalloc
  12. import std/private/syslocks
  13. import std/sysatomics
  14. template track(op, address, size) =
  15. when defined(memTracker):
  16. memTrackerOp(op, address, size)
  17. # We manage *chunks* of memory. Each chunk is a multiple of the page size.
  18. # Each chunk starts at an address that is divisible by the page size.
  19. # Small chunks may be divided into smaller cells of reusable pointers to reduce the number of page allocations.
  20. # An allocation of a small pointer looks approximately like this
  21. #[
  22. alloc -> rawAlloc -> No free chunk available > Request a new page from tslf -> result = chunk.data -------------+
  23. | |
  24. v |
  25. Free chunk available |
  26. | |
  27. v v
  28. Fetch shared cells -> No free cells available -> Advance acc -> result = chunk.data + chunk.acc -------> return
  29. (may not add new cells) ^
  30. | |
  31. v |
  32. Free cells available -> result = chunk.freeList -> Advance chunk.freeList -----------------------------------+
  33. ]#
  34. # so it is split into 3 paths, where the last path is preferred to prevent unnecessary allocations.
  35. #
  36. #
  37. # A deallocation of a small pointer then looks like this
  38. #[
  39. dealloc -> rawDealloc -> chunk.owner == addr(a) --------------> This thread owns the chunk ------> The current chunk is active -> Chunk is completely unused -----> Chunk references no foreign cells
  40. | | (Add cell into the current chunk) | Return the current chunk back to tlsf
  41. | | | |
  42. v v v v
  43. A different thread owns this chunk. The current chunk is not active. chunk.free was < size Chunk references foreign cells, noop
  44. Add the cell to a.sharedFreeLists Add the cell into the active chunk Activate the chunk (end)
  45. (end) (end) (end)
  46. ]#
  47. # So "true" deallocation is delayed for as long as possible in favor of reusing cells.
  48. const
  49. nimMinHeapPages {.intdefine.} = 128 # 0.5 MB
  50. SmallChunkSize = PageSize
  51. MaxFli = when sizeof(int) > 2: 30 else: 14
  52. MaxLog2Sli = 5 # 32, this cannot be increased without changing 'uint32'
  53. # everywhere!
  54. MaxSli = 1 shl MaxLog2Sli
  55. FliOffset = 6
  56. RealFli = MaxFli - FliOffset
  57. # size of chunks in last matrix bin
  58. MaxBigChunkSize = int(1'i32 shl MaxFli - 1'i32 shl (MaxFli-MaxLog2Sli-1))
  59. HugeChunkSize = MaxBigChunkSize + 1
  60. type
  61. PTrunk = ptr Trunk
  62. Trunk = object
  63. next: PTrunk # all nodes are connected with this pointer
  64. key: int # start address at bit 0
  65. bits: array[0..IntsPerTrunk-1, uint] # a bit vector
  66. TrunkBuckets = array[0..255, PTrunk]
  67. IntSet = object
  68. data: TrunkBuckets
  69. # ------------- chunk table ---------------------------------------------------
  70. # We use a PtrSet of chunk starts and a table[Page, chunksize] for chunk
  71. # endings of big chunks. This is needed by the merging operation. The only
  72. # remaining operation is best-fit for big chunks. Since there is a size-limit
  73. # for big chunks (because greater than the limit means they are returned back
  74. # to the OS), a fixed size array can be used.
  75. type
  76. PLLChunk = ptr LLChunk
  77. LLChunk = object ## *low-level* chunk
  78. size: int # remaining size
  79. acc: int # accumulator
  80. next: PLLChunk # next low-level chunk; only needed for dealloc
  81. PAvlNode = ptr AvlNode
  82. AvlNode = object
  83. link: array[0..1, PAvlNode] # Left (0) and right (1) links
  84. key, upperBound: int
  85. level: int
  86. const
  87. RegionHasLock = false # hasThreadSupport and defined(gcDestructors)
  88. type
  89. FreeCell {.final, pure.} = object
  90. # A free cell is a pointer that has been freed, meaning it became available for reuse.
  91. # It may become foreign if it is lent to a chunk that did not create it, doing so reduces the amount of needed pages.
  92. next: ptr FreeCell # next free cell in chunk (overlaid with refcount)
  93. when not defined(gcDestructors):
  94. zeroField: int # 0 means cell is not used (overlaid with typ field)
  95. # 1 means cell is manually managed pointer
  96. # otherwise a PNimType is stored in there
  97. else:
  98. alignment: int
  99. PChunk = ptr BaseChunk
  100. PBigChunk = ptr BigChunk
  101. PSmallChunk = ptr SmallChunk
  102. BaseChunk {.pure, inheritable.} = object
  103. prevSize: int # size of previous chunk; for coalescing
  104. # 0th bit == 1 if 'used
  105. size: int # if < PageSize it is a small chunk
  106. owner: ptr MemRegion
  107. SmallChunk = object of BaseChunk
  108. next, prev: PSmallChunk # chunks of the same size
  109. freeList: ptr FreeCell # Singly linked list of cells. They may be from foreign chunks or from the current chunk.
  110. # Should be `nil` when the chunk isn't active in `a.freeSmallChunks`.
  111. free: int32 # Bytes this chunk is able to provide using both the accumulator and free cells.
  112. # When a cell is considered foreign, its source chunk's free field is NOT adjusted until it
  113. # reaches dealloc while the source chunk is active.
  114. # Instead, the receiving chunk gains the capacity and thus reserves space in the foreign chunk.
  115. acc: uint32 # Offset from data, used when there are no free cells available but the chunk is considered free.
  116. foreignCells: int # When a free cell is given to a chunk that is not its origin,
  117. # both the cell and the source chunk are considered foreign.
  118. # Receiving a foreign cell can happen both when deallocating from another thread or when
  119. # the active chunk in `a.freeSmallChunks` is not the current chunk.
  120. # Freeing a chunk while `foreignCells > 0` leaks memory as all references to it become lost.
  121. data {.align: MemAlign.}: UncheckedArray[byte] # start of usable memory
  122. BigChunk = object of BaseChunk # not necessarily > PageSize!
  123. next, prev: PBigChunk # chunks of the same (or bigger) size
  124. data {.align: MemAlign.}: UncheckedArray[byte] # start of usable memory
  125. HeapLinks = object
  126. len: int
  127. chunks: array[30, (PBigChunk, int)]
  128. next: ptr HeapLinks
  129. MemRegion = object
  130. when not defined(gcDestructors):
  131. minLargeObj, maxLargeObj: int
  132. freeSmallChunks: array[0..max(1, SmallChunkSize div MemAlign-1), PSmallChunk]
  133. # List of available chunks per size class. Only one is expected to be active per class.
  134. when defined(gcDestructors):
  135. sharedFreeLists: array[0..max(1, SmallChunkSize div MemAlign-1), ptr FreeCell]
  136. # When a thread frees a pointer it did not create, it must not adjust the counters.
  137. # Instead, the cell is placed here and deferred until the next allocation.
  138. flBitmap: uint32
  139. slBitmap: array[RealFli, uint32]
  140. matrix: array[RealFli, array[MaxSli, PBigChunk]]
  141. llmem: PLLChunk
  142. currMem, maxMem, freeMem, occ: int # memory sizes (allocated from OS)
  143. lastSize: int # needed for the case that OS gives us pages linearly
  144. when RegionHasLock:
  145. lock: SysLock
  146. when defined(gcDestructors):
  147. sharedFreeListBigChunks: PBigChunk # make no attempt at avoiding false sharing for now for this object field
  148. chunkStarts: IntSet
  149. when not defined(gcDestructors):
  150. root, deleted, last, freeAvlNodes: PAvlNode
  151. lockActive, locked, blockChunkSizeIncrease: bool # if locked, we cannot free pages.
  152. nextChunkSize: int
  153. when not defined(gcDestructors):
  154. bottomData: AvlNode
  155. heapLinks: HeapLinks
  156. when defined(nimTypeNames):
  157. allocCounter, deallocCounter: int
  158. template smallChunkOverhead(): untyped = sizeof(SmallChunk)
  159. template bigChunkOverhead(): untyped = sizeof(BigChunk)
  160. when hasThreadSupport:
  161. template loada(x: untyped): untyped = atomicLoadN(unsafeAddr x, ATOMIC_RELAXED)
  162. template storea(x, y: untyped) = atomicStoreN(unsafeAddr x, y, ATOMIC_RELAXED)
  163. when false:
  164. # not yet required
  165. template atomicStatDec(x, diff: untyped) = discard atomicSubFetch(unsafeAddr x, diff, ATOMIC_RELAXED)
  166. template atomicStatInc(x, diff: untyped) = discard atomicAddFetch(unsafeAddr x, diff, ATOMIC_RELAXED)
  167. else:
  168. template loada(x: untyped): untyped = x
  169. template storea(x, y: untyped) = x = y
  170. template atomicStatDec(x, diff: untyped) = dec x, diff
  171. template atomicStatInc(x, diff: untyped) = inc x, diff
  172. const
  173. fsLookupTable: array[byte, int8] = [
  174. -1'i8, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
  175. 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  176. 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
  177. 5, 5, 5, 5, 5, 5, 5, 5,
  178. 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
  179. 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
  180. 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
  181. 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
  182. 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
  183. 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
  184. 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
  185. 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
  186. 7, 7, 7, 7, 7, 7, 7, 7
  187. ]
  188. proc msbit(x: uint32): int {.inline.} =
  189. let a = if x <= 0xff_ff'u32:
  190. (if x <= 0xff: 0 else: 8)
  191. else:
  192. (if x <= 0xff_ff_ff'u32: 16 else: 24)
  193. result = int(fsLookupTable[byte(x shr a)]) + a
  194. proc lsbit(x: uint32): int {.inline.} =
  195. msbit(x and ((not x) + 1))
  196. proc setBit(nr: int; dest: var uint32) {.inline.} =
  197. dest = dest or (1u32 shl (nr and 0x1f))
  198. proc clearBit(nr: int; dest: var uint32) {.inline.} =
  199. dest = dest and not (1u32 shl (nr and 0x1f))
  200. proc mappingSearch(r, fl, sl: var int) {.inline.} =
  201. #let t = (1 shl (msbit(uint32 r) - MaxLog2Sli)) - 1
  202. # This diverges from the standard TLSF algorithm because we need to ensure
  203. # PageSize alignment:
  204. let t = roundup((1 shl (msbit(uint32 r) - MaxLog2Sli)), PageSize) - 1
  205. r = r + t
  206. r = r and not t
  207. r = min(r, MaxBigChunkSize).int
  208. fl = msbit(uint32 r)
  209. sl = (r shr (fl - MaxLog2Sli)) - MaxSli
  210. dec fl, FliOffset
  211. sysAssert((r and PageMask) == 0, "mappingSearch: still not aligned")
  212. # See http://www.gii.upv.es/tlsf/files/papers/tlsf_desc.pdf for details of
  213. # this algorithm.
  214. proc mappingInsert(r: int): tuple[fl, sl: int] {.inline.} =
  215. sysAssert((r and PageMask) == 0, "mappingInsert: still not aligned")
  216. result.fl = msbit(uint32 r)
  217. result.sl = (r shr (result.fl - MaxLog2Sli)) - MaxSli
  218. dec result.fl, FliOffset
  219. template mat(): untyped = a.matrix[fl][sl]
  220. proc findSuitableBlock(a: MemRegion; fl, sl: var int): PBigChunk {.inline.} =
  221. let tmp = a.slBitmap[fl] and (not 0u32 shl sl)
  222. result = nil
  223. if tmp != 0:
  224. sl = lsbit(tmp)
  225. result = mat()
  226. else:
  227. fl = lsbit(a.flBitmap and (not 0u32 shl (fl + 1)))
  228. if fl > 0:
  229. sl = lsbit(a.slBitmap[fl])
  230. result = mat()
  231. template clearBits(sl, fl) =
  232. clearBit(sl, a.slBitmap[fl])
  233. if a.slBitmap[fl] == 0u32:
  234. # do not forget to cascade:
  235. clearBit(fl, a.flBitmap)
  236. proc removeChunkFromMatrix(a: var MemRegion; b: PBigChunk) =
  237. let (fl, sl) = mappingInsert(b.size)
  238. if b.next != nil: b.next.prev = b.prev
  239. if b.prev != nil: b.prev.next = b.next
  240. if mat() == b:
  241. mat() = b.next
  242. if mat() == nil:
  243. clearBits(sl, fl)
  244. b.prev = nil
  245. b.next = nil
  246. proc removeChunkFromMatrix2(a: var MemRegion; b: PBigChunk; fl, sl: int) =
  247. mat() = b.next
  248. if mat() != nil:
  249. mat().prev = nil
  250. else:
  251. clearBits(sl, fl)
  252. b.prev = nil
  253. b.next = nil
  254. proc addChunkToMatrix(a: var MemRegion; b: PBigChunk) =
  255. let (fl, sl) = mappingInsert(b.size)
  256. b.prev = nil
  257. b.next = mat()
  258. if mat() != nil:
  259. mat().prev = b
  260. mat() = b
  261. setBit(sl, a.slBitmap[fl])
  262. setBit(fl, a.flBitmap)
  263. proc incCurrMem(a: var MemRegion, bytes: int) {.inline.} =
  264. atomicStatInc(a.currMem, bytes)
  265. proc decCurrMem(a: var MemRegion, bytes: int) {.inline.} =
  266. a.maxMem = max(a.maxMem, a.currMem)
  267. atomicStatDec(a.currMem, bytes)
  268. proc getMaxMem(a: var MemRegion): int =
  269. # Since we update maxPagesCount only when freeing pages,
  270. # maxPagesCount may not be up to date. Thus we use the
  271. # maximum of these both values here:
  272. result = max(a.currMem, a.maxMem)
  273. const nimMaxHeap {.intdefine.} = 0
  274. proc allocPages(a: var MemRegion, size: int): pointer =
  275. when nimMaxHeap != 0:
  276. if a.occ + size > nimMaxHeap * 1024 * 1024:
  277. raiseOutOfMem()
  278. osAllocPages(size)
  279. proc tryAllocPages(a: var MemRegion, size: int): pointer =
  280. when nimMaxHeap != 0:
  281. if a.occ + size > nimMaxHeap * 1024 * 1024:
  282. raiseOutOfMem()
  283. osTryAllocPages(size)
  284. proc llAlloc(a: var MemRegion, size: int): pointer =
  285. # *low-level* alloc for the memory managers data structures. Deallocation
  286. # is done at the end of the allocator's life time.
  287. if a.llmem == nil or size > a.llmem.size:
  288. # the requested size is ``roundup(size+sizeof(LLChunk), PageSize)``, but
  289. # since we know ``size`` is a (small) constant, we know the requested size
  290. # is one page:
  291. sysAssert roundup(size+sizeof(LLChunk), PageSize) == PageSize, "roundup 6"
  292. var old = a.llmem # can be nil and is correct with nil
  293. a.llmem = cast[PLLChunk](allocPages(a, PageSize))
  294. when defined(nimAvlcorruption):
  295. trackLocation(a.llmem, PageSize)
  296. incCurrMem(a, PageSize)
  297. a.llmem.size = PageSize - sizeof(LLChunk)
  298. a.llmem.acc = sizeof(LLChunk)
  299. a.llmem.next = old
  300. result = cast[pointer](cast[int](a.llmem) + a.llmem.acc)
  301. dec(a.llmem.size, size)
  302. inc(a.llmem.acc, size)
  303. zeroMem(result, size)
  304. when not defined(gcDestructors):
  305. proc getBottom(a: var MemRegion): PAvlNode =
  306. result = addr(a.bottomData)
  307. if result.link[0] == nil:
  308. result.link[0] = result
  309. result.link[1] = result
  310. proc allocAvlNode(a: var MemRegion, key, upperBound: int): PAvlNode =
  311. if a.freeAvlNodes != nil:
  312. result = a.freeAvlNodes
  313. a.freeAvlNodes = a.freeAvlNodes.link[0]
  314. else:
  315. result = cast[PAvlNode](llAlloc(a, sizeof(AvlNode)))
  316. when defined(nimAvlcorruption):
  317. cprintf("tracking location: %p\n", result)
  318. result.key = key
  319. result.upperBound = upperBound
  320. let bottom = getBottom(a)
  321. result.link[0] = bottom
  322. result.link[1] = bottom
  323. result.level = 1
  324. #when defined(nimAvlcorruption):
  325. # track("allocAvlNode", result, sizeof(AvlNode))
  326. sysAssert(bottom == addr(a.bottomData), "bottom data")
  327. sysAssert(bottom.link[0] == bottom, "bottom link[0]")
  328. sysAssert(bottom.link[1] == bottom, "bottom link[1]")
  329. proc deallocAvlNode(a: var MemRegion, n: PAvlNode) {.inline.} =
  330. n.link[0] = a.freeAvlNodes
  331. a.freeAvlNodes = n
  332. proc addHeapLink(a: var MemRegion; p: PBigChunk, size: int): ptr HeapLinks =
  333. var it = addr(a.heapLinks)
  334. while it != nil and it.len >= it.chunks.len: it = it.next
  335. if it == nil:
  336. var n = cast[ptr HeapLinks](llAlloc(a, sizeof(HeapLinks)))
  337. n.next = a.heapLinks.next
  338. a.heapLinks.next = n
  339. n.chunks[0] = (p, size)
  340. n.len = 1
  341. result = n
  342. else:
  343. let L = it.len
  344. it.chunks[L] = (p, size)
  345. inc it.len
  346. result = it
  347. when not defined(gcDestructors):
  348. include "system/avltree"
  349. proc llDeallocAll(a: var MemRegion) =
  350. var it = a.llmem
  351. while it != nil:
  352. # we know each block in the list has the size of 1 page:
  353. var next = it.next
  354. osDeallocPages(it, PageSize)
  355. it = next
  356. a.llmem = nil
  357. proc intSetGet(t: IntSet, key: int): PTrunk =
  358. var it = t.data[key and high(t.data)]
  359. while it != nil:
  360. if it.key == key: return it
  361. it = it.next
  362. result = nil
  363. proc intSetPut(a: var MemRegion, t: var IntSet, key: int): PTrunk =
  364. result = intSetGet(t, key)
  365. if result == nil:
  366. result = cast[PTrunk](llAlloc(a, sizeof(result[])))
  367. result.next = t.data[key and high(t.data)]
  368. t.data[key and high(t.data)] = result
  369. result.key = key
  370. proc contains(s: IntSet, key: int): bool =
  371. var t = intSetGet(s, key shr TrunkShift)
  372. if t != nil:
  373. var u = key and TrunkMask
  374. result = (t.bits[u shr IntShift] and (uint(1) shl (u and IntMask))) != 0
  375. else:
  376. result = false
  377. proc incl(a: var MemRegion, s: var IntSet, key: int) =
  378. var t = intSetPut(a, s, key shr TrunkShift)
  379. var u = key and TrunkMask
  380. t.bits[u shr IntShift] = t.bits[u shr IntShift] or (uint(1) shl (u and IntMask))
  381. proc excl(s: var IntSet, key: int) =
  382. var t = intSetGet(s, key shr TrunkShift)
  383. if t != nil:
  384. var u = key and TrunkMask
  385. t.bits[u shr IntShift] = t.bits[u shr IntShift] and not
  386. (uint(1) shl (u and IntMask))
  387. iterator elements(t: IntSet): int {.inline.} =
  388. # while traversing it is forbidden to change the set!
  389. for h in 0..high(t.data):
  390. var r = t.data[h]
  391. while r != nil:
  392. var i = 0
  393. while i <= high(r.bits):
  394. var w = r.bits[i] # taking a copy of r.bits[i] here is correct, because
  395. # modifying operations are not allowed during traversation
  396. var j = 0
  397. while w != 0: # test all remaining bits for zero
  398. if (w and 1) != 0: # the bit is set!
  399. yield (r.key shl TrunkShift) or (i shl IntShift +% j)
  400. inc(j)
  401. w = w shr 1
  402. inc(i)
  403. r = r.next
  404. proc isSmallChunk(c: PChunk): bool {.inline.} =
  405. result = c.size <= SmallChunkSize-smallChunkOverhead()
  406. proc chunkUnused(c: PChunk): bool {.inline.} =
  407. result = (c.prevSize and 1) == 0
  408. iterator allObjects(m: var MemRegion): pointer {.inline.} =
  409. m.locked = true
  410. for s in elements(m.chunkStarts):
  411. # we need to check here again as it could have been modified:
  412. if s in m.chunkStarts:
  413. let c = cast[PChunk](s shl PageShift)
  414. if not chunkUnused(c):
  415. if isSmallChunk(c):
  416. var c = cast[PSmallChunk](c)
  417. let size = c.size
  418. var a = cast[int](addr(c.data))
  419. let limit = a + c.acc.int
  420. while a <% limit:
  421. yield cast[pointer](a)
  422. a = a +% size
  423. else:
  424. let c = cast[PBigChunk](c)
  425. yield addr(c.data)
  426. m.locked = false
  427. proc iterToProc*(iter: typed, envType: typedesc; procName: untyped) {.
  428. magic: "Plugin", compileTime.}
  429. when not defined(gcDestructors):
  430. proc isCell(p: pointer): bool {.inline.} =
  431. result = cast[ptr FreeCell](p).zeroField >% 1
  432. # ------------- chunk management ----------------------------------------------
  433. proc pageIndex(c: PChunk): int {.inline.} =
  434. result = cast[int](c) shr PageShift
  435. proc pageIndex(p: pointer): int {.inline.} =
  436. result = cast[int](p) shr PageShift
  437. proc pageAddr(p: pointer): PChunk {.inline.} =
  438. result = cast[PChunk](cast[int](p) and not PageMask)
  439. #sysAssert(Contains(allocator.chunkStarts, pageIndex(result)))
  440. when false:
  441. proc writeFreeList(a: MemRegion) =
  442. var it = a.freeChunksList
  443. c_fprintf(stdout, "freeChunksList: %p\n", it)
  444. while it != nil:
  445. c_fprintf(stdout, "it: %p, next: %p, prev: %p, size: %ld\n",
  446. it, it.next, it.prev, it.size)
  447. it = it.next
  448. proc requestOsChunks(a: var MemRegion, size: int): PBigChunk =
  449. when not defined(emscripten):
  450. if not a.blockChunkSizeIncrease:
  451. let usedMem = a.occ #a.currMem # - a.freeMem
  452. if usedMem < 64 * 1024:
  453. a.nextChunkSize = PageSize*4
  454. else:
  455. a.nextChunkSize = min(roundup(usedMem shr 2, PageSize), a.nextChunkSize * 2)
  456. a.nextChunkSize = min(a.nextChunkSize, MaxBigChunkSize).int
  457. var size = size
  458. if size > a.nextChunkSize:
  459. result = cast[PBigChunk](allocPages(a, size))
  460. else:
  461. result = cast[PBigChunk](tryAllocPages(a, a.nextChunkSize))
  462. if result == nil:
  463. result = cast[PBigChunk](allocPages(a, size))
  464. a.blockChunkSizeIncrease = true
  465. else:
  466. size = a.nextChunkSize
  467. incCurrMem(a, size)
  468. inc(a.freeMem, size)
  469. let heapLink = a.addHeapLink(result, size)
  470. when defined(debugHeapLinks):
  471. cprintf("owner: %p; result: %p; next pointer %p; size: %ld\n", addr(a),
  472. result, heapLink, size)
  473. when defined(memtracker):
  474. trackLocation(addr result.size, sizeof(int))
  475. sysAssert((cast[int](result) and PageMask) == 0, "requestOsChunks 1")
  476. #zeroMem(result, size)
  477. result.next = nil
  478. result.prev = nil
  479. result.size = size
  480. # update next.prevSize:
  481. var nxt = cast[int](result) +% size
  482. sysAssert((nxt and PageMask) == 0, "requestOsChunks 2")
  483. var next = cast[PChunk](nxt)
  484. if pageIndex(next) in a.chunkStarts:
  485. #echo("Next already allocated!")
  486. next.prevSize = size or (next.prevSize and 1)
  487. # set result.prevSize:
  488. var lastSize = if a.lastSize != 0: a.lastSize else: PageSize
  489. var prv = cast[int](result) -% lastSize
  490. sysAssert((nxt and PageMask) == 0, "requestOsChunks 3")
  491. var prev = cast[PChunk](prv)
  492. if pageIndex(prev) in a.chunkStarts and prev.size == lastSize:
  493. #echo("Prev already allocated!")
  494. result.prevSize = lastSize or (result.prevSize and 1)
  495. else:
  496. result.prevSize = 0 or (result.prevSize and 1) # unknown
  497. # but do not overwrite 'used' field
  498. a.lastSize = size # for next request
  499. sysAssert((cast[int](result) and PageMask) == 0, "requestOschunks: unaligned chunk")
  500. proc isAccessible(a: MemRegion, p: pointer): bool {.inline.} =
  501. result = contains(a.chunkStarts, pageIndex(p))
  502. proc contains[T](list, x: T): bool =
  503. var it = list
  504. while it != nil:
  505. if it == x: return true
  506. it = it.next
  507. proc listAdd[T](head: var T, c: T) {.inline.} =
  508. sysAssert(c notin head, "listAdd 1")
  509. sysAssert c.prev == nil, "listAdd 2"
  510. sysAssert c.next == nil, "listAdd 3"
  511. c.next = head
  512. if head != nil:
  513. sysAssert head.prev == nil, "listAdd 4"
  514. head.prev = c
  515. head = c
  516. proc listRemove[T](head: var T, c: T) {.inline.} =
  517. sysAssert(c in head, "listRemove")
  518. if c == head:
  519. head = c.next
  520. sysAssert c.prev == nil, "listRemove 2"
  521. if head != nil: head.prev = nil
  522. else:
  523. sysAssert c.prev != nil, "listRemove 3"
  524. c.prev.next = c.next
  525. if c.next != nil: c.next.prev = c.prev
  526. c.next = nil
  527. c.prev = nil
  528. proc updatePrevSize(a: var MemRegion, c: PBigChunk,
  529. prevSize: int) {.inline.} =
  530. var ri = cast[PChunk](cast[int](c) +% c.size)
  531. sysAssert((cast[int](ri) and PageMask) == 0, "updatePrevSize")
  532. if isAccessible(a, ri):
  533. ri.prevSize = prevSize or (ri.prevSize and 1)
  534. proc splitChunk2(a: var MemRegion, c: PBigChunk, size: int): PBigChunk =
  535. result = cast[PBigChunk](cast[int](c) +% size)
  536. result.size = c.size - size
  537. track("result.size", addr result.size, sizeof(int))
  538. when not defined(nimOptimizedSplitChunk):
  539. # still active because of weird codegen issue on some of our CIs:
  540. result.next = nil
  541. result.prev = nil
  542. # size and not used:
  543. result.prevSize = size
  544. result.owner = addr a
  545. sysAssert((size and 1) == 0, "splitChunk 2")
  546. sysAssert((size and PageMask) == 0,
  547. "splitChunk: size is not a multiple of the PageSize")
  548. updatePrevSize(a, c, result.size)
  549. c.size = size
  550. incl(a, a.chunkStarts, pageIndex(result))
  551. proc splitChunk(a: var MemRegion, c: PBigChunk, size: int) =
  552. let rest = splitChunk2(a, c, size)
  553. addChunkToMatrix(a, rest)
  554. proc freeBigChunk(a: var MemRegion, c: PBigChunk) =
  555. var c = c
  556. sysAssert(c.size >= PageSize, "freeBigChunk")
  557. inc(a.freeMem, c.size)
  558. c.prevSize = c.prevSize and not 1 # set 'used' to false
  559. when coalescLeft:
  560. let prevSize = c.prevSize
  561. if prevSize != 0:
  562. var le = cast[PChunk](cast[int](c) -% prevSize)
  563. sysAssert((cast[int](le) and PageMask) == 0, "freeBigChunk 4")
  564. if isAccessible(a, le) and chunkUnused(le):
  565. sysAssert(not isSmallChunk(le), "freeBigChunk 5")
  566. if not isSmallChunk(le) and le.size < MaxBigChunkSize:
  567. removeChunkFromMatrix(a, cast[PBigChunk](le))
  568. inc(le.size, c.size)
  569. excl(a.chunkStarts, pageIndex(c))
  570. c = cast[PBigChunk](le)
  571. if c.size > MaxBigChunkSize:
  572. let rest = splitChunk2(a, c, MaxBigChunkSize)
  573. when defined(nimOptimizedSplitChunk):
  574. rest.next = nil
  575. rest.prev = nil
  576. addChunkToMatrix(a, c)
  577. c = rest
  578. when coalescRight:
  579. var ri = cast[PChunk](cast[int](c) +% c.size)
  580. sysAssert((cast[int](ri) and PageMask) == 0, "freeBigChunk 2")
  581. if isAccessible(a, ri) and chunkUnused(ri):
  582. sysAssert(not isSmallChunk(ri), "freeBigChunk 3")
  583. if not isSmallChunk(ri) and c.size < MaxBigChunkSize:
  584. removeChunkFromMatrix(a, cast[PBigChunk](ri))
  585. inc(c.size, ri.size)
  586. excl(a.chunkStarts, pageIndex(ri))
  587. if c.size > MaxBigChunkSize:
  588. let rest = splitChunk2(a, c, MaxBigChunkSize)
  589. addChunkToMatrix(a, rest)
  590. addChunkToMatrix(a, c)
  591. proc getBigChunk(a: var MemRegion, size: int): PBigChunk =
  592. sysAssert(size > 0, "getBigChunk 2")
  593. var size = size # roundup(size, PageSize)
  594. var fl = 0
  595. var sl = 0
  596. mappingSearch(size, fl, sl)
  597. sysAssert((size and PageMask) == 0, "getBigChunk: unaligned chunk")
  598. result = findSuitableBlock(a, fl, sl)
  599. when RegionHasLock:
  600. if not a.lockActive:
  601. a.lockActive = true
  602. initSysLock(a.lock)
  603. acquireSys a.lock
  604. if result == nil:
  605. if size < nimMinHeapPages * PageSize:
  606. result = requestOsChunks(a, nimMinHeapPages * PageSize)
  607. splitChunk(a, result, size)
  608. else:
  609. result = requestOsChunks(a, size)
  610. # if we over allocated split the chunk:
  611. if result.size > size:
  612. splitChunk(a, result, size)
  613. result.owner = addr a
  614. else:
  615. removeChunkFromMatrix2(a, result, fl, sl)
  616. if result.size >= size + PageSize:
  617. splitChunk(a, result, size)
  618. # set 'used' to to true:
  619. result.prevSize = 1
  620. track("setUsedToFalse", addr result.size, sizeof(int))
  621. sysAssert result.owner == addr a, "getBigChunk: No owner set!"
  622. incl(a, a.chunkStarts, pageIndex(result))
  623. dec(a.freeMem, size)
  624. when RegionHasLock:
  625. releaseSys a.lock
  626. proc getHugeChunk(a: var MemRegion; size: int): PBigChunk =
  627. result = cast[PBigChunk](allocPages(a, size))
  628. when RegionHasLock:
  629. if not a.lockActive:
  630. a.lockActive = true
  631. initSysLock(a.lock)
  632. acquireSys a.lock
  633. incCurrMem(a, size)
  634. # XXX add this to the heap links. But also remove it from it later.
  635. when false: a.addHeapLink(result, size)
  636. sysAssert((cast[int](result) and PageMask) == 0, "getHugeChunk")
  637. result.next = nil
  638. result.prev = nil
  639. result.size = size
  640. # set 'used' to to true:
  641. result.prevSize = 1
  642. result.owner = addr a
  643. incl(a, a.chunkStarts, pageIndex(result))
  644. when RegionHasLock:
  645. releaseSys a.lock
  646. proc freeHugeChunk(a: var MemRegion; c: PBigChunk) =
  647. let size = c.size
  648. sysAssert(size >= HugeChunkSize, "freeHugeChunk: invalid size")
  649. excl(a.chunkStarts, pageIndex(c))
  650. decCurrMem(a, size)
  651. osDeallocPages(c, size)
  652. proc getSmallChunk(a: var MemRegion): PSmallChunk =
  653. var res = getBigChunk(a, PageSize)
  654. sysAssert res.prev == nil, "getSmallChunk 1"
  655. sysAssert res.next == nil, "getSmallChunk 2"
  656. result = cast[PSmallChunk](res)
  657. # -----------------------------------------------------------------------------
  658. when not defined(gcDestructors):
  659. proc isAllocatedPtr(a: MemRegion, p: pointer): bool {.benign.}
  660. when true:
  661. template allocInv(a: MemRegion): bool = true
  662. else:
  663. proc allocInv(a: MemRegion): bool =
  664. ## checks some (not all yet) invariants of the allocator's data structures.
  665. for s in low(a.freeSmallChunks)..high(a.freeSmallChunks):
  666. var c = a.freeSmallChunks[s]
  667. while not (c == nil):
  668. if c.next == c:
  669. echo "[SYSASSERT] c.next == c"
  670. return false
  671. if not (c.size == s * MemAlign):
  672. echo "[SYSASSERT] c.size != s * MemAlign"
  673. return false
  674. var it = c.freeList
  675. while not (it == nil):
  676. if not (it.zeroField == 0):
  677. echo "[SYSASSERT] it.zeroField != 0"
  678. c_printf("%ld %p\n", it.zeroField, it)
  679. return false
  680. it = it.next
  681. c = c.next
  682. result = true
  683. when false:
  684. var
  685. rsizes: array[50_000, int]
  686. rsizesLen: int
  687. proc trackSize(size: int) =
  688. rsizes[rsizesLen] = size
  689. inc rsizesLen
  690. proc untrackSize(size: int) =
  691. for i in 0 .. rsizesLen-1:
  692. if rsizes[i] == size:
  693. rsizes[i] = rsizes[rsizesLen-1]
  694. dec rsizesLen
  695. return
  696. c_fprintf(stdout, "%ld\n", size)
  697. sysAssert(false, "untracked size!")
  698. else:
  699. template trackSize(x) = discard
  700. template untrackSize(x) = discard
  701. proc deallocBigChunk(a: var MemRegion, c: PBigChunk) =
  702. when RegionHasLock:
  703. acquireSys a.lock
  704. dec a.occ, c.size
  705. untrackSize(c.size)
  706. sysAssert a.occ >= 0, "rawDealloc: negative occupied memory (case B)"
  707. when not defined(gcDestructors):
  708. a.deleted = getBottom(a)
  709. del(a, a.root, cast[int](addr(c.data)))
  710. if c.size >= HugeChunkSize: freeHugeChunk(a, c)
  711. else: freeBigChunk(a, c)
  712. when RegionHasLock:
  713. releaseSys a.lock
  714. when defined(gcDestructors):
  715. template atomicPrepend(head, elem: untyped) =
  716. # see also https://en.cppreference.com/w/cpp/atomic/atomic_compare_exchange
  717. when hasThreadSupport:
  718. while true:
  719. elem.next.storea head.loada
  720. if atomicCompareExchangeN(addr head, addr elem.next, elem, weak = true, ATOMIC_RELEASE, ATOMIC_RELAXED):
  721. break
  722. else:
  723. elem.next.storea head.loada
  724. head.storea elem
  725. proc addToSharedFreeListBigChunks(a: var MemRegion; c: PBigChunk) {.inline.} =
  726. sysAssert c.next == nil, "c.next pointer must be nil"
  727. atomicPrepend a.sharedFreeListBigChunks, c
  728. proc addToSharedFreeList(c: PSmallChunk; f: ptr FreeCell; size: int) {.inline.} =
  729. atomicPrepend c.owner.sharedFreeLists[size], f
  730. const MaxSteps = 20
  731. proc compensateCounters(a: var MemRegion; c: PSmallChunk; size: int) =
  732. # rawDealloc did NOT do the usual:
  733. # `inc(c.free, size); dec(a.occ, size)` because it wasn't the owner of these
  734. # memory locations. We have to compensate here for these for the entire list.
  735. var it = c.freeList
  736. var total = 0
  737. while it != nil:
  738. inc total, size
  739. let chunk = cast[PSmallChunk](pageAddr(it))
  740. if c != chunk:
  741. # The cell is foreign, potentially even from a foreign thread.
  742. # It must block the current chunk from being freed, as doing so would leak memory.
  743. inc c.foreignCells
  744. it = it.next
  745. # By not adjusting the foreign chunk we reserve space in it to prevent deallocation
  746. inc(c.free, total)
  747. dec(a.occ, total)
  748. proc freeDeferredObjects(a: var MemRegion; root: PBigChunk) =
  749. var it = root
  750. var maxIters = MaxSteps # make it time-bounded
  751. while true:
  752. let rest = it.next.loada
  753. it.next.storea nil
  754. deallocBigChunk(a, cast[PBigChunk](it))
  755. if maxIters == 0:
  756. if rest != nil:
  757. addToSharedFreeListBigChunks(a, rest)
  758. sysAssert a.sharedFreeListBigChunks != nil, "re-enqueing failed"
  759. break
  760. it = rest
  761. dec maxIters
  762. if it == nil: break
  763. proc rawAlloc(a: var MemRegion, requestedSize: int): pointer =
  764. when defined(nimTypeNames):
  765. inc(a.allocCounter)
  766. sysAssert(allocInv(a), "rawAlloc: begin")
  767. sysAssert(roundup(65, 8) == 72, "rawAlloc: roundup broken")
  768. var size = roundup(requestedSize, MemAlign)
  769. sysAssert(size >= sizeof(FreeCell), "rawAlloc: requested size too small")
  770. sysAssert(size >= requestedSize, "insufficient allocated size!")
  771. #c_fprintf(stdout, "alloc; size: %ld; %ld\n", requestedSize, size)
  772. if size <= SmallChunkSize-smallChunkOverhead():
  773. template fetchSharedCells(tc: PSmallChunk) =
  774. # Consumes cells from (potentially) foreign threads from `a.sharedFreeLists[s]`
  775. when defined(gcDestructors):
  776. if tc.freeList == nil:
  777. when hasThreadSupport:
  778. # Steal the entire list from `sharedFreeList`:
  779. tc.freeList = atomicExchangeN(addr a.sharedFreeLists[s], nil, ATOMIC_RELAXED)
  780. else:
  781. tc.freeList = a.sharedFreeLists[s]
  782. a.sharedFreeLists[s] = nil
  783. # if `tc.freeList` isn't nil, `tc` will gain capacity.
  784. # We must calculate how much it gained and how many foreign cells are included.
  785. compensateCounters(a, tc, size)
  786. # allocate a small block: for small chunks, we use only its next pointer
  787. let s = size div MemAlign
  788. var c = a.freeSmallChunks[s]
  789. if c == nil:
  790. # There is no free chunk of the requested size available, we need a new one.
  791. c = getSmallChunk(a)
  792. # init all fields in case memory didn't get zeroed
  793. c.freeList = nil
  794. c.foreignCells = 0
  795. sysAssert c.size == PageSize, "rawAlloc 3"
  796. c.size = size
  797. c.acc = size.uint32
  798. c.free = SmallChunkSize - smallChunkOverhead() - size.int32
  799. sysAssert c.owner == addr(a), "rawAlloc: No owner set!"
  800. c.next = nil
  801. c.prev = nil
  802. # Shared cells are fetched here in case `c.size * 2 >= SmallChunkSize - smallChunkOverhead()`.
  803. # For those single cell chunks, we would otherwise have to allocate a new one almost every time.
  804. fetchSharedCells(c)
  805. if c.free >= size:
  806. # Because removals from `a.freeSmallChunks[s]` only happen in the other alloc branch and during dealloc,
  807. # we must not add it to the list if it cannot be used the next time a pointer of `size` bytes is needed.
  808. listAdd(a.freeSmallChunks[s], c)
  809. result = addr(c.data)
  810. sysAssert((cast[int](result) and (MemAlign-1)) == 0, "rawAlloc 4")
  811. else:
  812. # There is a free chunk of the requested size available, use it.
  813. sysAssert(allocInv(a), "rawAlloc: begin c != nil")
  814. sysAssert c.next != c, "rawAlloc 5"
  815. #if c.size != size:
  816. # c_fprintf(stdout, "csize: %lld; size %lld\n", c.size, size)
  817. sysAssert c.size == size, "rawAlloc 6"
  818. if c.freeList == nil:
  819. sysAssert(c.acc.int + smallChunkOverhead() + size <= SmallChunkSize,
  820. "rawAlloc 7")
  821. result = cast[pointer](cast[int](addr(c.data)) +% c.acc.int)
  822. inc(c.acc, size)
  823. else:
  824. # There are free cells available, prefer them over the accumulator
  825. result = c.freeList
  826. when not defined(gcDestructors):
  827. sysAssert(c.freeList.zeroField == 0, "rawAlloc 8")
  828. c.freeList = c.freeList.next
  829. if cast[PSmallChunk](pageAddr(result)) != c:
  830. # This cell isn't a blocker for the current chunk's deallocation anymore
  831. dec(c.foreignCells)
  832. else:
  833. sysAssert(c == cast[PSmallChunk](pageAddr(result)), "rawAlloc: Bad cell")
  834. # Even if the cell we return is foreign, the local chunk's capacity decreases.
  835. # The capacity was previously reserved in the source chunk (when it first got allocated),
  836. # then added into the current chunk during dealloc,
  837. # so the source chunk will not be freed or leak memory because of this.
  838. dec(c.free, size)
  839. sysAssert((cast[int](result) and (MemAlign-1)) == 0, "rawAlloc 9")
  840. sysAssert(allocInv(a), "rawAlloc: end c != nil")
  841. # We fetch deferred cells *after* advancing `c.freeList`/`acc` to adjust `c.free`.
  842. # If after the adjustment it turns out there's free cells available,
  843. # the chunk stays in `a.freeSmallChunks[s]` and the need for a new chunk is delayed.
  844. fetchSharedCells(c)
  845. sysAssert(allocInv(a), "rawAlloc: before c.free < size")
  846. if c.free < size:
  847. # Even after fetching shared cells the chunk has no usable memory left. It is no longer the active chunk
  848. sysAssert(allocInv(a), "rawAlloc: before listRemove test")
  849. listRemove(a.freeSmallChunks[s], c)
  850. sysAssert(allocInv(a), "rawAlloc: end listRemove test")
  851. sysAssert(((cast[int](result) and PageMask) - smallChunkOverhead()) %%
  852. size == 0, "rawAlloc 21")
  853. sysAssert(allocInv(a), "rawAlloc: end small size")
  854. inc a.occ, size
  855. trackSize(c.size)
  856. else:
  857. when defined(gcDestructors):
  858. when hasThreadSupport:
  859. let deferredFrees = atomicExchangeN(addr a.sharedFreeListBigChunks, nil, ATOMIC_RELAXED)
  860. else:
  861. let deferredFrees = a.sharedFreeListBigChunks
  862. a.sharedFreeListBigChunks = nil
  863. if deferredFrees != nil:
  864. freeDeferredObjects(a, deferredFrees)
  865. size = requestedSize + bigChunkOverhead() # roundup(requestedSize+bigChunkOverhead(), PageSize)
  866. # allocate a large block
  867. var c = if size >= HugeChunkSize: getHugeChunk(a, size)
  868. else: getBigChunk(a, size)
  869. sysAssert c.prev == nil, "rawAlloc 10"
  870. sysAssert c.next == nil, "rawAlloc 11"
  871. result = addr(c.data)
  872. sysAssert((cast[int](c) and (MemAlign-1)) == 0, "rawAlloc 13")
  873. sysAssert((cast[int](c) and PageMask) == 0, "rawAlloc: Not aligned on a page boundary")
  874. when not defined(gcDestructors):
  875. if a.root == nil: a.root = getBottom(a)
  876. add(a, a.root, cast[int](result), cast[int](result)+%size)
  877. inc a.occ, c.size
  878. trackSize(c.size)
  879. sysAssert(isAccessible(a, result), "rawAlloc 14")
  880. sysAssert(allocInv(a), "rawAlloc: end")
  881. when logAlloc: cprintf("var pointer_%p = alloc(%ld) # %p\n", result, requestedSize, addr a)
  882. proc rawAlloc0(a: var MemRegion, requestedSize: int): pointer =
  883. result = rawAlloc(a, requestedSize)
  884. zeroMem(result, requestedSize)
  885. proc rawDealloc(a: var MemRegion, p: pointer) =
  886. when defined(nimTypeNames):
  887. inc(a.deallocCounter)
  888. #sysAssert(isAllocatedPtr(a, p), "rawDealloc: no allocated pointer")
  889. sysAssert(allocInv(a), "rawDealloc: begin")
  890. var c = pageAddr(p)
  891. sysAssert(c != nil, "rawDealloc: begin")
  892. if isSmallChunk(c):
  893. # `p` is within a small chunk:
  894. var c = cast[PSmallChunk](c)
  895. let s = c.size
  896. # ^ We might access thread foreign storage here.
  897. # The other thread cannot possibly free this block as it's still alive.
  898. var f = cast[ptr FreeCell](p)
  899. if c.owner == addr(a):
  900. # We own the block, there is no foreign thread involved.
  901. dec a.occ, s
  902. untrackSize(s)
  903. sysAssert a.occ >= 0, "rawDealloc: negative occupied memory (case A)"
  904. sysAssert(((cast[int](p) and PageMask) - smallChunkOverhead()) %%
  905. s == 0, "rawDealloc 3")
  906. when not defined(gcDestructors):
  907. #echo("setting to nil: ", $cast[int](addr(f.zeroField)))
  908. sysAssert(f.zeroField != 0, "rawDealloc 1")
  909. f.zeroField = 0
  910. when overwriteFree:
  911. # set to 0xff to check for usage after free bugs:
  912. nimSetMem(cast[pointer](cast[int](p) +% sizeof(FreeCell)), -1'i32,
  913. s -% sizeof(FreeCell))
  914. let activeChunk = a.freeSmallChunks[s div MemAlign]
  915. if activeChunk != nil and c != activeChunk:
  916. # This pointer is not part of the active chunk, lend it out
  917. # and do not adjust the current chunk (same logic as compensateCounters.)
  918. # Put the cell into the active chunk,
  919. # may prevent a queue of available chunks from forming in a.freeSmallChunks[s div MemAlign].
  920. # This queue would otherwise waste memory in the form of free cells until we return to those chunks.
  921. f.next = activeChunk.freeList
  922. activeChunk.freeList = f # lend the cell
  923. inc(activeChunk.free, s) # By not adjusting the current chunk's capacity it is prevented from being freed
  924. inc(activeChunk.foreignCells) # The cell is now considered foreign from the perspective of the active chunk
  925. else:
  926. f.next = c.freeList
  927. c.freeList = f
  928. if c.free < s:
  929. # The chunk could not have been active as it didn't have enough space to give
  930. listAdd(a.freeSmallChunks[s div MemAlign], c)
  931. inc(c.free, s)
  932. else:
  933. inc(c.free, s)
  934. # Free only if the entire chunk is unused and there are no borrowed cells.
  935. # If the chunk were to be freed while it references foreign cells,
  936. # the foreign chunks will leak memory and can never be freed.
  937. if c.free == SmallChunkSize-smallChunkOverhead() and c.foreignCells == 0:
  938. listRemove(a.freeSmallChunks[s div MemAlign], c)
  939. c.size = SmallChunkSize
  940. freeBigChunk(a, cast[PBigChunk](c))
  941. else:
  942. when logAlloc: cprintf("dealloc(pointer_%p) # SMALL FROM %p CALLER %p\n", p, c.owner, addr(a))
  943. when defined(gcDestructors):
  944. addToSharedFreeList(c, f, s div MemAlign)
  945. sysAssert(((cast[int](p) and PageMask) - smallChunkOverhead()) %%
  946. s == 0, "rawDealloc 2")
  947. else:
  948. # set to 0xff to check for usage after free bugs:
  949. when overwriteFree: nimSetMem(p, -1'i32, c.size -% bigChunkOverhead())
  950. when logAlloc: cprintf("dealloc(pointer_%p) # BIG %p\n", p, c.owner)
  951. when defined(gcDestructors):
  952. if c.owner == addr(a):
  953. deallocBigChunk(a, cast[PBigChunk](c))
  954. else:
  955. addToSharedFreeListBigChunks(c.owner[], cast[PBigChunk](c))
  956. else:
  957. deallocBigChunk(a, cast[PBigChunk](c))
  958. sysAssert(allocInv(a), "rawDealloc: end")
  959. #when logAlloc: cprintf("dealloc(pointer_%p)\n", p)
  960. when not defined(gcDestructors):
  961. proc isAllocatedPtr(a: MemRegion, p: pointer): bool =
  962. if isAccessible(a, p):
  963. var c = pageAddr(p)
  964. if not chunkUnused(c):
  965. if isSmallChunk(c):
  966. var c = cast[PSmallChunk](c)
  967. var offset = (cast[int](p) and (PageSize-1)) -%
  968. smallChunkOverhead()
  969. result = (c.acc.int >% offset) and (offset %% c.size == 0) and
  970. (cast[ptr FreeCell](p).zeroField >% 1)
  971. else:
  972. var c = cast[PBigChunk](c)
  973. result = p == addr(c.data) and cast[ptr FreeCell](p).zeroField >% 1
  974. proc prepareForInteriorPointerChecking(a: var MemRegion) {.inline.} =
  975. a.minLargeObj = lowGauge(a.root)
  976. a.maxLargeObj = highGauge(a.root)
  977. proc interiorAllocatedPtr(a: MemRegion, p: pointer): pointer =
  978. if isAccessible(a, p):
  979. var c = pageAddr(p)
  980. if not chunkUnused(c):
  981. if isSmallChunk(c):
  982. var c = cast[PSmallChunk](c)
  983. var offset = (cast[int](p) and (PageSize-1)) -%
  984. smallChunkOverhead()
  985. if c.acc.int >% offset:
  986. sysAssert(cast[int](addr(c.data)) +% offset ==
  987. cast[int](p), "offset is not what you think it is")
  988. var d = cast[ptr FreeCell](cast[int](addr(c.data)) +%
  989. offset -% (offset %% c.size))
  990. if d.zeroField >% 1:
  991. result = d
  992. sysAssert isAllocatedPtr(a, result), " result wrong pointer!"
  993. else:
  994. var c = cast[PBigChunk](c)
  995. var d = addr(c.data)
  996. if p >= d and cast[ptr FreeCell](d).zeroField >% 1:
  997. result = d
  998. sysAssert isAllocatedPtr(a, result), " result wrong pointer!"
  999. else:
  1000. var q = cast[int](p)
  1001. if q >=% a.minLargeObj and q <=% a.maxLargeObj:
  1002. # this check is highly effective! Test fails for 99,96% of all checks on
  1003. # an x86-64.
  1004. var avlNode = inRange(a.root, q)
  1005. if avlNode != nil:
  1006. var k = cast[pointer](avlNode.key)
  1007. var c = cast[PBigChunk](pageAddr(k))
  1008. sysAssert(addr(c.data) == k, " k is not the same as addr(c.data)!")
  1009. if cast[ptr FreeCell](k).zeroField >% 1:
  1010. result = k
  1011. sysAssert isAllocatedPtr(a, result), " result wrong pointer!"
  1012. proc ptrSize(p: pointer): int =
  1013. when not defined(gcDestructors):
  1014. var x = cast[pointer](cast[int](p) -% sizeof(FreeCell))
  1015. var c = pageAddr(p)
  1016. sysAssert(not chunkUnused(c), "ptrSize")
  1017. result = c.size -% sizeof(FreeCell)
  1018. if not isSmallChunk(c):
  1019. dec result, bigChunkOverhead()
  1020. else:
  1021. var c = pageAddr(p)
  1022. sysAssert(not chunkUnused(c), "ptrSize")
  1023. result = c.size
  1024. if not isSmallChunk(c):
  1025. dec result, bigChunkOverhead()
  1026. proc alloc(allocator: var MemRegion, size: Natural): pointer {.gcsafe.} =
  1027. when not defined(gcDestructors):
  1028. result = rawAlloc(allocator, size+sizeof(FreeCell))
  1029. cast[ptr FreeCell](result).zeroField = 1 # mark it as used
  1030. sysAssert(not isAllocatedPtr(allocator, result), "alloc")
  1031. result = cast[pointer](cast[int](result) +% sizeof(FreeCell))
  1032. track("alloc", result, size)
  1033. else:
  1034. result = rawAlloc(allocator, size)
  1035. proc alloc0(allocator: var MemRegion, size: Natural): pointer =
  1036. result = alloc(allocator, size)
  1037. zeroMem(result, size)
  1038. proc dealloc(allocator: var MemRegion, p: pointer) =
  1039. when not defined(gcDestructors):
  1040. sysAssert(p != nil, "dealloc: p is nil")
  1041. var x = cast[pointer](cast[int](p) -% sizeof(FreeCell))
  1042. sysAssert(x != nil, "dealloc: x is nil")
  1043. sysAssert(isAccessible(allocator, x), "is not accessible")
  1044. sysAssert(cast[ptr FreeCell](x).zeroField == 1, "dealloc: object header corrupted")
  1045. rawDealloc(allocator, x)
  1046. sysAssert(not isAllocatedPtr(allocator, x), "dealloc: object still accessible")
  1047. track("dealloc", p, 0)
  1048. else:
  1049. rawDealloc(allocator, p)
  1050. proc realloc(allocator: var MemRegion, p: pointer, newsize: Natural): pointer =
  1051. if newsize > 0:
  1052. result = alloc(allocator, newsize)
  1053. if p != nil:
  1054. copyMem(result, p, min(ptrSize(p), newsize))
  1055. dealloc(allocator, p)
  1056. elif p != nil:
  1057. dealloc(allocator, p)
  1058. proc realloc0(allocator: var MemRegion, p: pointer, oldsize, newsize: Natural): pointer =
  1059. result = realloc(allocator, p, newsize)
  1060. if newsize > oldsize:
  1061. zeroMem(cast[pointer](cast[uint](result) + uint(oldsize)), newsize - oldsize)
  1062. proc deallocOsPages(a: var MemRegion) =
  1063. # we free every 'ordinarily' allocated page by iterating over the page bits:
  1064. var it = addr(a.heapLinks)
  1065. while true:
  1066. let next = it.next
  1067. for i in 0..it.len-1:
  1068. let (p, size) = it.chunks[i]
  1069. when defined(debugHeapLinks):
  1070. cprintf("owner %p; dealloc A: %p size: %ld; next: %p\n", addr(a),
  1071. it, size, next)
  1072. sysAssert size >= PageSize, "origSize too small"
  1073. osDeallocPages(p, size)
  1074. it = next
  1075. if it == nil: break
  1076. # And then we free the pages that are in use for the page bits:
  1077. llDeallocAll(a)
  1078. proc getFreeMem(a: MemRegion): int {.inline.} = result = a.freeMem
  1079. proc getTotalMem(a: MemRegion): int {.inline.} = result = a.currMem
  1080. proc getOccupiedMem(a: MemRegion): int {.inline.} =
  1081. result = a.occ
  1082. # a.currMem - a.freeMem
  1083. when defined(nimTypeNames):
  1084. proc getMemCounters(a: MemRegion): (int, int) {.inline.} =
  1085. (a.allocCounter, a.deallocCounter)
  1086. # ---------------------- thread memory region -------------------------------
  1087. template instantiateForRegion(allocator: untyped) {.dirty.} =
  1088. {.push stackTrace: off.}
  1089. when defined(nimFulldebug):
  1090. proc interiorAllocatedPtr*(p: pointer): pointer =
  1091. result = interiorAllocatedPtr(allocator, p)
  1092. proc isAllocatedPtr*(p: pointer): bool =
  1093. let p = cast[pointer](cast[int](p)-%ByteAddress(sizeof(Cell)))
  1094. result = isAllocatedPtr(allocator, p)
  1095. proc deallocOsPages = deallocOsPages(allocator)
  1096. proc allocImpl(size: Natural): pointer =
  1097. result = alloc(allocator, size)
  1098. proc alloc0Impl(size: Natural): pointer =
  1099. result = alloc0(allocator, size)
  1100. proc deallocImpl(p: pointer) =
  1101. dealloc(allocator, p)
  1102. proc reallocImpl(p: pointer, newSize: Natural): pointer =
  1103. result = realloc(allocator, p, newSize)
  1104. proc realloc0Impl(p: pointer, oldSize, newSize: Natural): pointer =
  1105. result = realloc(allocator, p, newSize)
  1106. if newSize > oldSize:
  1107. zeroMem(cast[pointer](cast[uint](result) + uint(oldSize)), newSize - oldSize)
  1108. when false:
  1109. proc countFreeMem(): int =
  1110. # only used for assertions
  1111. var it = allocator.freeChunksList
  1112. while it != nil:
  1113. inc(result, it.size)
  1114. it = it.next
  1115. when hasThreadSupport and not defined(gcDestructors):
  1116. proc addSysExitProc(quitProc: proc() {.noconv.}) {.importc: "atexit", header: "<stdlib.h>".}
  1117. var sharedHeap: MemRegion
  1118. var heapLock: SysLock
  1119. initSysLock(heapLock)
  1120. addSysExitProc(proc() {.noconv.} = deinitSys(heapLock))
  1121. proc getFreeMem(): int =
  1122. #sysAssert(result == countFreeMem())
  1123. result = allocator.freeMem
  1124. proc getTotalMem(): int =
  1125. result = allocator.currMem
  1126. proc getOccupiedMem(): int =
  1127. result = allocator.occ #getTotalMem() - getFreeMem()
  1128. proc getMaxMem*(): int =
  1129. result = getMaxMem(allocator)
  1130. when defined(nimTypeNames):
  1131. proc getMemCounters*(): (int, int) = getMemCounters(allocator)
  1132. # -------------------- shared heap region ----------------------------------
  1133. proc allocSharedImpl(size: Natural): pointer =
  1134. when hasThreadSupport and not defined(gcDestructors):
  1135. acquireSys(heapLock)
  1136. result = alloc(sharedHeap, size)
  1137. releaseSys(heapLock)
  1138. else:
  1139. result = allocImpl(size)
  1140. proc allocShared0Impl(size: Natural): pointer =
  1141. result = allocSharedImpl(size)
  1142. zeroMem(result, size)
  1143. proc deallocSharedImpl(p: pointer) =
  1144. when hasThreadSupport and not defined(gcDestructors):
  1145. acquireSys(heapLock)
  1146. dealloc(sharedHeap, p)
  1147. releaseSys(heapLock)
  1148. else:
  1149. deallocImpl(p)
  1150. proc reallocSharedImpl(p: pointer, newSize: Natural): pointer =
  1151. when hasThreadSupport and not defined(gcDestructors):
  1152. acquireSys(heapLock)
  1153. result = realloc(sharedHeap, p, newSize)
  1154. releaseSys(heapLock)
  1155. else:
  1156. result = reallocImpl(p, newSize)
  1157. proc reallocShared0Impl(p: pointer, oldSize, newSize: Natural): pointer =
  1158. when hasThreadSupport and not defined(gcDestructors):
  1159. acquireSys(heapLock)
  1160. result = realloc0(sharedHeap, p, oldSize, newSize)
  1161. releaseSys(heapLock)
  1162. else:
  1163. result = realloc0Impl(p, oldSize, newSize)
  1164. when hasThreadSupport:
  1165. when defined(gcDestructors):
  1166. proc getFreeSharedMem(): int =
  1167. allocator.freeMem
  1168. proc getTotalSharedMem(): int =
  1169. allocator.currMem
  1170. proc getOccupiedSharedMem(): int =
  1171. allocator.occ
  1172. else:
  1173. template sharedMemStatsShared(v: int) =
  1174. acquireSys(heapLock)
  1175. result = v
  1176. releaseSys(heapLock)
  1177. proc getFreeSharedMem(): int =
  1178. sharedMemStatsShared(sharedHeap.freeMem)
  1179. proc getTotalSharedMem(): int =
  1180. sharedMemStatsShared(sharedHeap.currMem)
  1181. proc getOccupiedSharedMem(): int =
  1182. sharedMemStatsShared(sharedHeap.occ)
  1183. #sharedMemStatsShared(sharedHeap.currMem - sharedHeap.freeMem)
  1184. {.pop.}
  1185. {.pop.}