sanitizer_allocator.h 44 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388
  1. //===-- sanitizer_allocator.h -----------------------------------*- C++ -*-===//
  2. //
  3. // This file is distributed under the University of Illinois Open Source
  4. // License. See LICENSE.TXT for details.
  5. //
  6. //===----------------------------------------------------------------------===//
  7. //
  8. // Specialized memory allocator for ThreadSanitizer, MemorySanitizer, etc.
  9. //
  10. //===----------------------------------------------------------------------===//
  11. #ifndef SANITIZER_ALLOCATOR_H
  12. #define SANITIZER_ALLOCATOR_H
  13. #include "sanitizer_internal_defs.h"
  14. #include "sanitizer_common.h"
  15. #include "sanitizer_libc.h"
  16. #include "sanitizer_list.h"
  17. #include "sanitizer_mutex.h"
  18. #include "sanitizer_lfstack.h"
  19. namespace __sanitizer {
  20. // Depending on allocator_may_return_null either return 0 or crash.
  21. void *AllocatorReturnNull();
  22. // SizeClassMap maps allocation sizes into size classes and back.
  23. // Class 0 corresponds to size 0.
  24. // Classes 1 - 16 correspond to sizes 16 to 256 (size = class_id * 16).
  25. // Next 4 classes: 256 + i * 64 (i = 1 to 4).
  26. // Next 4 classes: 512 + i * 128 (i = 1 to 4).
  27. // ...
  28. // Next 4 classes: 2^k + i * 2^(k-2) (i = 1 to 4).
  29. // Last class corresponds to kMaxSize = 1 << kMaxSizeLog.
  30. //
  31. // This structure of the size class map gives us:
  32. // - Efficient table-free class-to-size and size-to-class functions.
  33. // - Difference between two consequent size classes is betweed 14% and 25%
  34. //
  35. // This class also gives a hint to a thread-caching allocator about the amount
  36. // of chunks that need to be cached per-thread:
  37. // - kMaxNumCached is the maximal number of chunks per size class.
  38. // - (1 << kMaxBytesCachedLog) is the maximal number of bytes per size class.
  39. //
  40. // Part of output of SizeClassMap::Print():
  41. // c00 => s: 0 diff: +0 00% l 0 cached: 0 0; id 0
  42. // c01 => s: 16 diff: +16 00% l 4 cached: 256 4096; id 1
  43. // c02 => s: 32 diff: +16 100% l 5 cached: 256 8192; id 2
  44. // c03 => s: 48 diff: +16 50% l 5 cached: 256 12288; id 3
  45. // c04 => s: 64 diff: +16 33% l 6 cached: 256 16384; id 4
  46. // c05 => s: 80 diff: +16 25% l 6 cached: 256 20480; id 5
  47. // c06 => s: 96 diff: +16 20% l 6 cached: 256 24576; id 6
  48. // c07 => s: 112 diff: +16 16% l 6 cached: 256 28672; id 7
  49. //
  50. // c08 => s: 128 diff: +16 14% l 7 cached: 256 32768; id 8
  51. // c09 => s: 144 diff: +16 12% l 7 cached: 256 36864; id 9
  52. // c10 => s: 160 diff: +16 11% l 7 cached: 256 40960; id 10
  53. // c11 => s: 176 diff: +16 10% l 7 cached: 256 45056; id 11
  54. // c12 => s: 192 diff: +16 09% l 7 cached: 256 49152; id 12
  55. // c13 => s: 208 diff: +16 08% l 7 cached: 256 53248; id 13
  56. // c14 => s: 224 diff: +16 07% l 7 cached: 256 57344; id 14
  57. // c15 => s: 240 diff: +16 07% l 7 cached: 256 61440; id 15
  58. //
  59. // c16 => s: 256 diff: +16 06% l 8 cached: 256 65536; id 16
  60. // c17 => s: 320 diff: +64 25% l 8 cached: 204 65280; id 17
  61. // c18 => s: 384 diff: +64 20% l 8 cached: 170 65280; id 18
  62. // c19 => s: 448 diff: +64 16% l 8 cached: 146 65408; id 19
  63. //
  64. // c20 => s: 512 diff: +64 14% l 9 cached: 128 65536; id 20
  65. // c21 => s: 640 diff: +128 25% l 9 cached: 102 65280; id 21
  66. // c22 => s: 768 diff: +128 20% l 9 cached: 85 65280; id 22
  67. // c23 => s: 896 diff: +128 16% l 9 cached: 73 65408; id 23
  68. //
  69. // c24 => s: 1024 diff: +128 14% l 10 cached: 64 65536; id 24
  70. // c25 => s: 1280 diff: +256 25% l 10 cached: 51 65280; id 25
  71. // c26 => s: 1536 diff: +256 20% l 10 cached: 42 64512; id 26
  72. // c27 => s: 1792 diff: +256 16% l 10 cached: 36 64512; id 27
  73. //
  74. // ...
  75. //
  76. // c48 => s: 65536 diff: +8192 14% l 16 cached: 1 65536; id 48
  77. // c49 => s: 81920 diff: +16384 25% l 16 cached: 1 81920; id 49
  78. // c50 => s: 98304 diff: +16384 20% l 16 cached: 1 98304; id 50
  79. // c51 => s: 114688 diff: +16384 16% l 16 cached: 1 114688; id 51
  80. //
  81. // c52 => s: 131072 diff: +16384 14% l 17 cached: 1 131072; id 52
  82. template <uptr kMaxSizeLog, uptr kMaxNumCachedT, uptr kMaxBytesCachedLog>
  83. class SizeClassMap {
  84. static const uptr kMinSizeLog = 4;
  85. static const uptr kMidSizeLog = kMinSizeLog + 4;
  86. static const uptr kMinSize = 1 << kMinSizeLog;
  87. static const uptr kMidSize = 1 << kMidSizeLog;
  88. static const uptr kMidClass = kMidSize / kMinSize;
  89. static const uptr S = 2;
  90. static const uptr M = (1 << S) - 1;
  91. public:
  92. static const uptr kMaxNumCached = kMaxNumCachedT;
  93. // We transfer chunks between central and thread-local free lists in batches.
  94. // For small size classes we allocate batches separately.
  95. // For large size classes we use one of the chunks to store the batch.
  96. struct TransferBatch {
  97. TransferBatch *next;
  98. uptr count;
  99. void *batch[kMaxNumCached];
  100. };
  101. static const uptr kMaxSize = 1UL << kMaxSizeLog;
  102. static const uptr kNumClasses =
  103. kMidClass + ((kMaxSizeLog - kMidSizeLog) << S) + 1;
  104. COMPILER_CHECK(kNumClasses >= 32 && kNumClasses <= 256);
  105. static const uptr kNumClassesRounded =
  106. kNumClasses == 32 ? 32 :
  107. kNumClasses <= 64 ? 64 :
  108. kNumClasses <= 128 ? 128 : 256;
  109. static uptr Size(uptr class_id) {
  110. if (class_id <= kMidClass)
  111. return kMinSize * class_id;
  112. class_id -= kMidClass;
  113. uptr t = kMidSize << (class_id >> S);
  114. return t + (t >> S) * (class_id & M);
  115. }
  116. static uptr ClassID(uptr size) {
  117. if (size <= kMidSize)
  118. return (size + kMinSize - 1) >> kMinSizeLog;
  119. if (size > kMaxSize) return 0;
  120. uptr l = MostSignificantSetBitIndex(size);
  121. uptr hbits = (size >> (l - S)) & M;
  122. uptr lbits = size & ((1 << (l - S)) - 1);
  123. uptr l1 = l - kMidSizeLog;
  124. return kMidClass + (l1 << S) + hbits + (lbits > 0);
  125. }
  126. static uptr MaxCached(uptr class_id) {
  127. if (class_id == 0) return 0;
  128. uptr n = (1UL << kMaxBytesCachedLog) / Size(class_id);
  129. return Max<uptr>(1, Min(kMaxNumCached, n));
  130. }
  131. static void Print() {
  132. uptr prev_s = 0;
  133. uptr total_cached = 0;
  134. for (uptr i = 0; i < kNumClasses; i++) {
  135. uptr s = Size(i);
  136. if (s >= kMidSize / 2 && (s & (s - 1)) == 0)
  137. Printf("\n");
  138. uptr d = s - prev_s;
  139. uptr p = prev_s ? (d * 100 / prev_s) : 0;
  140. uptr l = s ? MostSignificantSetBitIndex(s) : 0;
  141. uptr cached = MaxCached(i) * s;
  142. Printf("c%02zd => s: %zd diff: +%zd %02zd%% l %zd "
  143. "cached: %zd %zd; id %zd\n",
  144. i, Size(i), d, p, l, MaxCached(i), cached, ClassID(s));
  145. total_cached += cached;
  146. prev_s = s;
  147. }
  148. Printf("Total cached: %zd\n", total_cached);
  149. }
  150. static bool SizeClassRequiresSeparateTransferBatch(uptr class_id) {
  151. return Size(class_id) < sizeof(TransferBatch) -
  152. sizeof(uptr) * (kMaxNumCached - MaxCached(class_id));
  153. }
  154. static void Validate() {
  155. for (uptr c = 1; c < kNumClasses; c++) {
  156. // Printf("Validate: c%zd\n", c);
  157. uptr s = Size(c);
  158. CHECK_NE(s, 0U);
  159. CHECK_EQ(ClassID(s), c);
  160. if (c != kNumClasses - 1)
  161. CHECK_EQ(ClassID(s + 1), c + 1);
  162. CHECK_EQ(ClassID(s - 1), c);
  163. if (c)
  164. CHECK_GT(Size(c), Size(c-1));
  165. }
  166. CHECK_EQ(ClassID(kMaxSize + 1), 0);
  167. for (uptr s = 1; s <= kMaxSize; s++) {
  168. uptr c = ClassID(s);
  169. // Printf("s%zd => c%zd\n", s, c);
  170. CHECK_LT(c, kNumClasses);
  171. CHECK_GE(Size(c), s);
  172. if (c > 0)
  173. CHECK_LT(Size(c-1), s);
  174. }
  175. }
  176. };
  177. typedef SizeClassMap<17, 128, 16> DefaultSizeClassMap;
  178. typedef SizeClassMap<17, 64, 14> CompactSizeClassMap;
  179. template<class SizeClassAllocator> struct SizeClassAllocatorLocalCache;
  180. // Memory allocator statistics
  181. enum AllocatorStat {
  182. AllocatorStatAllocated,
  183. AllocatorStatMapped,
  184. AllocatorStatCount
  185. };
  186. typedef uptr AllocatorStatCounters[AllocatorStatCount];
  187. // Per-thread stats, live in per-thread cache.
  188. class AllocatorStats {
  189. public:
  190. void Init() {
  191. internal_memset(this, 0, sizeof(*this));
  192. }
  193. void Add(AllocatorStat i, uptr v) {
  194. v += atomic_load(&stats_[i], memory_order_relaxed);
  195. atomic_store(&stats_[i], v, memory_order_relaxed);
  196. }
  197. void Sub(AllocatorStat i, uptr v) {
  198. v = atomic_load(&stats_[i], memory_order_relaxed) - v;
  199. atomic_store(&stats_[i], v, memory_order_relaxed);
  200. }
  201. void Set(AllocatorStat i, uptr v) {
  202. atomic_store(&stats_[i], v, memory_order_relaxed);
  203. }
  204. uptr Get(AllocatorStat i) const {
  205. return atomic_load(&stats_[i], memory_order_relaxed);
  206. }
  207. private:
  208. friend class AllocatorGlobalStats;
  209. AllocatorStats *next_;
  210. AllocatorStats *prev_;
  211. atomic_uintptr_t stats_[AllocatorStatCount];
  212. };
  213. // Global stats, used for aggregation and querying.
  214. class AllocatorGlobalStats : public AllocatorStats {
  215. public:
  216. void Init() {
  217. internal_memset(this, 0, sizeof(*this));
  218. next_ = this;
  219. prev_ = this;
  220. }
  221. void Register(AllocatorStats *s) {
  222. SpinMutexLock l(&mu_);
  223. s->next_ = next_;
  224. s->prev_ = this;
  225. next_->prev_ = s;
  226. next_ = s;
  227. }
  228. void Unregister(AllocatorStats *s) {
  229. SpinMutexLock l(&mu_);
  230. s->prev_->next_ = s->next_;
  231. s->next_->prev_ = s->prev_;
  232. for (int i = 0; i < AllocatorStatCount; i++)
  233. Add(AllocatorStat(i), s->Get(AllocatorStat(i)));
  234. }
  235. void Get(AllocatorStatCounters s) const {
  236. internal_memset(s, 0, AllocatorStatCount * sizeof(uptr));
  237. SpinMutexLock l(&mu_);
  238. const AllocatorStats *stats = this;
  239. for (;;) {
  240. for (int i = 0; i < AllocatorStatCount; i++)
  241. s[i] += stats->Get(AllocatorStat(i));
  242. stats = stats->next_;
  243. if (stats == this)
  244. break;
  245. }
  246. // All stats must be non-negative.
  247. for (int i = 0; i < AllocatorStatCount; i++)
  248. s[i] = ((sptr)s[i]) >= 0 ? s[i] : 0;
  249. }
  250. private:
  251. mutable SpinMutex mu_;
  252. };
  253. // Allocators call these callbacks on mmap/munmap.
  254. struct NoOpMapUnmapCallback {
  255. void OnMap(uptr p, uptr size) const { }
  256. void OnUnmap(uptr p, uptr size) const { }
  257. };
  258. // Callback type for iterating over chunks.
  259. typedef void (*ForEachChunkCallback)(uptr chunk, void *arg);
  260. // SizeClassAllocator64 -- allocator for 64-bit address space.
  261. //
  262. // Space: a portion of address space of kSpaceSize bytes starting at
  263. // a fixed address (kSpaceBeg). Both constants are powers of two and
  264. // kSpaceBeg is kSpaceSize-aligned.
  265. // At the beginning the entire space is mprotect-ed, then small parts of it
  266. // are mapped on demand.
  267. //
  268. // Region: a part of Space dedicated to a single size class.
  269. // There are kNumClasses Regions of equal size.
  270. //
  271. // UserChunk: a piece of memory returned to user.
  272. // MetaChunk: kMetadataSize bytes of metadata associated with a UserChunk.
  273. //
  274. // A Region looks like this:
  275. // UserChunk1 ... UserChunkN <gap> MetaChunkN ... MetaChunk1
  276. template <const uptr kSpaceBeg, const uptr kSpaceSize,
  277. const uptr kMetadataSize, class SizeClassMap,
  278. class MapUnmapCallback = NoOpMapUnmapCallback>
  279. class SizeClassAllocator64 {
  280. public:
  281. typedef typename SizeClassMap::TransferBatch Batch;
  282. typedef SizeClassAllocator64<kSpaceBeg, kSpaceSize, kMetadataSize,
  283. SizeClassMap, MapUnmapCallback> ThisT;
  284. typedef SizeClassAllocatorLocalCache<ThisT> AllocatorCache;
  285. void Init() {
  286. CHECK_EQ(kSpaceBeg,
  287. reinterpret_cast<uptr>(Mprotect(kSpaceBeg, kSpaceSize)));
  288. MapWithCallback(kSpaceEnd, AdditionalSize());
  289. }
  290. void MapWithCallback(uptr beg, uptr size) {
  291. CHECK_EQ(beg, reinterpret_cast<uptr>(MmapFixedOrDie(beg, size)));
  292. MapUnmapCallback().OnMap(beg, size);
  293. }
  294. void UnmapWithCallback(uptr beg, uptr size) {
  295. MapUnmapCallback().OnUnmap(beg, size);
  296. UnmapOrDie(reinterpret_cast<void *>(beg), size);
  297. }
  298. static bool CanAllocate(uptr size, uptr alignment) {
  299. return size <= SizeClassMap::kMaxSize &&
  300. alignment <= SizeClassMap::kMaxSize;
  301. }
  302. NOINLINE Batch* AllocateBatch(AllocatorStats *stat, AllocatorCache *c,
  303. uptr class_id) {
  304. CHECK_LT(class_id, kNumClasses);
  305. RegionInfo *region = GetRegionInfo(class_id);
  306. Batch *b = region->free_list.Pop();
  307. if (b == 0)
  308. b = PopulateFreeList(stat, c, class_id, region);
  309. region->n_allocated += b->count;
  310. return b;
  311. }
  312. NOINLINE void DeallocateBatch(AllocatorStats *stat, uptr class_id, Batch *b) {
  313. RegionInfo *region = GetRegionInfo(class_id);
  314. CHECK_GT(b->count, 0);
  315. region->free_list.Push(b);
  316. region->n_freed += b->count;
  317. }
  318. static bool PointerIsMine(const void *p) {
  319. return reinterpret_cast<uptr>(p) / kSpaceSize == kSpaceBeg / kSpaceSize;
  320. }
  321. static uptr GetSizeClass(const void *p) {
  322. return (reinterpret_cast<uptr>(p) / kRegionSize) % kNumClassesRounded;
  323. }
  324. void *GetBlockBegin(const void *p) {
  325. uptr class_id = GetSizeClass(p);
  326. uptr size = SizeClassMap::Size(class_id);
  327. if (!size) return 0;
  328. uptr chunk_idx = GetChunkIdx((uptr)p, size);
  329. uptr reg_beg = (uptr)p & ~(kRegionSize - 1);
  330. uptr beg = chunk_idx * size;
  331. uptr next_beg = beg + size;
  332. if (class_id >= kNumClasses) return 0;
  333. RegionInfo *region = GetRegionInfo(class_id);
  334. if (region->mapped_user >= next_beg)
  335. return reinterpret_cast<void*>(reg_beg + beg);
  336. return 0;
  337. }
  338. static uptr GetActuallyAllocatedSize(void *p) {
  339. CHECK(PointerIsMine(p));
  340. return SizeClassMap::Size(GetSizeClass(p));
  341. }
  342. uptr ClassID(uptr size) { return SizeClassMap::ClassID(size); }
  343. void *GetMetaData(const void *p) {
  344. uptr class_id = GetSizeClass(p);
  345. uptr size = SizeClassMap::Size(class_id);
  346. uptr chunk_idx = GetChunkIdx(reinterpret_cast<uptr>(p), size);
  347. return reinterpret_cast<void*>(kSpaceBeg + (kRegionSize * (class_id + 1)) -
  348. (1 + chunk_idx) * kMetadataSize);
  349. }
  350. uptr TotalMemoryUsed() {
  351. uptr res = 0;
  352. for (uptr i = 0; i < kNumClasses; i++)
  353. res += GetRegionInfo(i)->allocated_user;
  354. return res;
  355. }
  356. // Test-only.
  357. void TestOnlyUnmap() {
  358. UnmapWithCallback(kSpaceBeg, kSpaceSize + AdditionalSize());
  359. }
  360. void PrintStats() {
  361. uptr total_mapped = 0;
  362. uptr n_allocated = 0;
  363. uptr n_freed = 0;
  364. for (uptr class_id = 1; class_id < kNumClasses; class_id++) {
  365. RegionInfo *region = GetRegionInfo(class_id);
  366. total_mapped += region->mapped_user;
  367. n_allocated += region->n_allocated;
  368. n_freed += region->n_freed;
  369. }
  370. Printf("Stats: SizeClassAllocator64: %zdM mapped in %zd allocations; "
  371. "remains %zd\n",
  372. total_mapped >> 20, n_allocated, n_allocated - n_freed);
  373. for (uptr class_id = 1; class_id < kNumClasses; class_id++) {
  374. RegionInfo *region = GetRegionInfo(class_id);
  375. if (region->mapped_user == 0) continue;
  376. Printf(" %02zd (%zd): total: %zd K allocs: %zd remains: %zd\n",
  377. class_id,
  378. SizeClassMap::Size(class_id),
  379. region->mapped_user >> 10,
  380. region->n_allocated,
  381. region->n_allocated - region->n_freed);
  382. }
  383. }
  384. // ForceLock() and ForceUnlock() are needed to implement Darwin malloc zone
  385. // introspection API.
  386. void ForceLock() {
  387. for (uptr i = 0; i < kNumClasses; i++) {
  388. GetRegionInfo(i)->mutex.Lock();
  389. }
  390. }
  391. void ForceUnlock() {
  392. for (int i = (int)kNumClasses - 1; i >= 0; i--) {
  393. GetRegionInfo(i)->mutex.Unlock();
  394. }
  395. }
  396. // Iterate over all existing chunks.
  397. // The allocator must be locked when calling this function.
  398. void ForEachChunk(ForEachChunkCallback callback, void *arg) {
  399. for (uptr class_id = 1; class_id < kNumClasses; class_id++) {
  400. RegionInfo *region = GetRegionInfo(class_id);
  401. uptr chunk_size = SizeClassMap::Size(class_id);
  402. uptr region_beg = kSpaceBeg + class_id * kRegionSize;
  403. for (uptr chunk = region_beg;
  404. chunk < region_beg + region->allocated_user;
  405. chunk += chunk_size) {
  406. // Too slow: CHECK_EQ((void *)chunk, GetBlockBegin((void *)chunk));
  407. callback(chunk, arg);
  408. }
  409. }
  410. }
  411. static uptr AdditionalSize() {
  412. return RoundUpTo(sizeof(RegionInfo) * kNumClassesRounded,
  413. GetPageSizeCached());
  414. }
  415. typedef SizeClassMap SizeClassMapT;
  416. static const uptr kNumClasses = SizeClassMap::kNumClasses;
  417. static const uptr kNumClassesRounded = SizeClassMap::kNumClassesRounded;
  418. private:
  419. static const uptr kRegionSize = kSpaceSize / kNumClassesRounded;
  420. static const uptr kSpaceEnd = kSpaceBeg + kSpaceSize;
  421. COMPILER_CHECK(kSpaceBeg % kSpaceSize == 0);
  422. // kRegionSize must be >= 2^32.
  423. COMPILER_CHECK((kRegionSize) >= (1ULL << (SANITIZER_WORDSIZE / 2)));
  424. // Populate the free list with at most this number of bytes at once
  425. // or with one element if its size is greater.
  426. static const uptr kPopulateSize = 1 << 14;
  427. // Call mmap for user memory with at least this size.
  428. static const uptr kUserMapSize = 1 << 16;
  429. // Call mmap for metadata memory with at least this size.
  430. static const uptr kMetaMapSize = 1 << 16;
  431. struct RegionInfo {
  432. BlockingMutex mutex;
  433. LFStack<Batch> free_list;
  434. uptr allocated_user; // Bytes allocated for user memory.
  435. uptr allocated_meta; // Bytes allocated for metadata.
  436. uptr mapped_user; // Bytes mapped for user memory.
  437. uptr mapped_meta; // Bytes mapped for metadata.
  438. uptr n_allocated, n_freed; // Just stats.
  439. };
  440. COMPILER_CHECK(sizeof(RegionInfo) >= kCacheLineSize);
  441. RegionInfo *GetRegionInfo(uptr class_id) {
  442. CHECK_LT(class_id, kNumClasses);
  443. RegionInfo *regions = reinterpret_cast<RegionInfo*>(kSpaceBeg + kSpaceSize);
  444. return &regions[class_id];
  445. }
  446. static uptr GetChunkIdx(uptr chunk, uptr size) {
  447. uptr offset = chunk % kRegionSize;
  448. // Here we divide by a non-constant. This is costly.
  449. // size always fits into 32-bits. If the offset fits too, use 32-bit div.
  450. if (offset >> (SANITIZER_WORDSIZE / 2))
  451. return offset / size;
  452. return (u32)offset / (u32)size;
  453. }
  454. NOINLINE Batch* PopulateFreeList(AllocatorStats *stat, AllocatorCache *c,
  455. uptr class_id, RegionInfo *region) {
  456. BlockingMutexLock l(&region->mutex);
  457. Batch *b = region->free_list.Pop();
  458. if (b)
  459. return b;
  460. uptr size = SizeClassMap::Size(class_id);
  461. uptr count = size < kPopulateSize ? SizeClassMap::MaxCached(class_id) : 1;
  462. uptr beg_idx = region->allocated_user;
  463. uptr end_idx = beg_idx + count * size;
  464. uptr region_beg = kSpaceBeg + kRegionSize * class_id;
  465. if (end_idx + size > region->mapped_user) {
  466. // Do the mmap for the user memory.
  467. uptr map_size = kUserMapSize;
  468. while (end_idx + size > region->mapped_user + map_size)
  469. map_size += kUserMapSize;
  470. CHECK_GE(region->mapped_user + map_size, end_idx);
  471. MapWithCallback(region_beg + region->mapped_user, map_size);
  472. stat->Add(AllocatorStatMapped, map_size);
  473. region->mapped_user += map_size;
  474. }
  475. uptr total_count = (region->mapped_user - beg_idx - size)
  476. / size / count * count;
  477. region->allocated_meta += total_count * kMetadataSize;
  478. if (region->allocated_meta > region->mapped_meta) {
  479. uptr map_size = kMetaMapSize;
  480. while (region->allocated_meta > region->mapped_meta + map_size)
  481. map_size += kMetaMapSize;
  482. // Do the mmap for the metadata.
  483. CHECK_GE(region->mapped_meta + map_size, region->allocated_meta);
  484. MapWithCallback(region_beg + kRegionSize -
  485. region->mapped_meta - map_size, map_size);
  486. region->mapped_meta += map_size;
  487. }
  488. CHECK_LE(region->allocated_meta, region->mapped_meta);
  489. if (region->mapped_user + region->mapped_meta > kRegionSize) {
  490. Printf("%s: Out of memory. Dying. ", SanitizerToolName);
  491. Printf("The process has exhausted %zuMB for size class %zu.\n",
  492. kRegionSize / 1024 / 1024, size);
  493. Die();
  494. }
  495. for (;;) {
  496. if (SizeClassMap::SizeClassRequiresSeparateTransferBatch(class_id))
  497. b = (Batch*)c->Allocate(this, SizeClassMap::ClassID(sizeof(Batch)));
  498. else
  499. b = (Batch*)(region_beg + beg_idx);
  500. b->count = count;
  501. for (uptr i = 0; i < count; i++)
  502. b->batch[i] = (void*)(region_beg + beg_idx + i * size);
  503. region->allocated_user += count * size;
  504. CHECK_LE(region->allocated_user, region->mapped_user);
  505. beg_idx += count * size;
  506. if (beg_idx + count * size + size > region->mapped_user)
  507. break;
  508. CHECK_GT(b->count, 0);
  509. region->free_list.Push(b);
  510. }
  511. return b;
  512. }
  513. };
  514. // Maps integers in rage [0, kSize) to u8 values.
  515. template<u64 kSize>
  516. class FlatByteMap {
  517. public:
  518. void TestOnlyInit() {
  519. internal_memset(map_, 0, sizeof(map_));
  520. }
  521. void set(uptr idx, u8 val) {
  522. CHECK_LT(idx, kSize);
  523. CHECK_EQ(0U, map_[idx]);
  524. map_[idx] = val;
  525. }
  526. u8 operator[] (uptr idx) {
  527. CHECK_LT(idx, kSize);
  528. // FIXME: CHECK may be too expensive here.
  529. return map_[idx];
  530. }
  531. private:
  532. u8 map_[kSize];
  533. };
  534. // TwoLevelByteMap maps integers in range [0, kSize1*kSize2) to u8 values.
  535. // It is implemented as a two-dimensional array: array of kSize1 pointers
  536. // to kSize2-byte arrays. The secondary arrays are mmaped on demand.
  537. // Each value is initially zero and can be set to something else only once.
  538. // Setting and getting values from multiple threads is safe w/o extra locking.
  539. template <u64 kSize1, u64 kSize2, class MapUnmapCallback = NoOpMapUnmapCallback>
  540. class TwoLevelByteMap {
  541. public:
  542. void TestOnlyInit() {
  543. internal_memset(map1_, 0, sizeof(map1_));
  544. mu_.Init();
  545. }
  546. void TestOnlyUnmap() {
  547. for (uptr i = 0; i < kSize1; i++) {
  548. u8 *p = Get(i);
  549. if (!p) continue;
  550. MapUnmapCallback().OnUnmap(reinterpret_cast<uptr>(p), kSize2);
  551. UnmapOrDie(p, kSize2);
  552. }
  553. }
  554. uptr size() const { return kSize1 * kSize2; }
  555. uptr size1() const { return kSize1; }
  556. uptr size2() const { return kSize2; }
  557. void set(uptr idx, u8 val) {
  558. CHECK_LT(idx, kSize1 * kSize2);
  559. u8 *map2 = GetOrCreate(idx / kSize2);
  560. CHECK_EQ(0U, map2[idx % kSize2]);
  561. map2[idx % kSize2] = val;
  562. }
  563. u8 operator[] (uptr idx) const {
  564. CHECK_LT(idx, kSize1 * kSize2);
  565. u8 *map2 = Get(idx / kSize2);
  566. if (!map2) return 0;
  567. return map2[idx % kSize2];
  568. }
  569. private:
  570. u8 *Get(uptr idx) const {
  571. CHECK_LT(idx, kSize1);
  572. return reinterpret_cast<u8 *>(
  573. atomic_load(&map1_[idx], memory_order_acquire));
  574. }
  575. u8 *GetOrCreate(uptr idx) {
  576. u8 *res = Get(idx);
  577. if (!res) {
  578. SpinMutexLock l(&mu_);
  579. if (!(res = Get(idx))) {
  580. res = (u8*)MmapOrDie(kSize2, "TwoLevelByteMap");
  581. MapUnmapCallback().OnMap(reinterpret_cast<uptr>(res), kSize2);
  582. atomic_store(&map1_[idx], reinterpret_cast<uptr>(res),
  583. memory_order_release);
  584. }
  585. }
  586. return res;
  587. }
  588. atomic_uintptr_t map1_[kSize1];
  589. StaticSpinMutex mu_;
  590. };
  591. // SizeClassAllocator32 -- allocator for 32-bit address space.
  592. // This allocator can theoretically be used on 64-bit arch, but there it is less
  593. // efficient than SizeClassAllocator64.
  594. //
  595. // [kSpaceBeg, kSpaceBeg + kSpaceSize) is the range of addresses which can
  596. // be returned by MmapOrDie().
  597. //
  598. // Region:
  599. // a result of a single call to MmapAlignedOrDie(kRegionSize, kRegionSize).
  600. // Since the regions are aligned by kRegionSize, there are exactly
  601. // kNumPossibleRegions possible regions in the address space and so we keep
  602. // a ByteMap possible_regions to store the size classes of each Region.
  603. // 0 size class means the region is not used by the allocator.
  604. //
  605. // One Region is used to allocate chunks of a single size class.
  606. // A Region looks like this:
  607. // UserChunk1 .. UserChunkN <gap> MetaChunkN .. MetaChunk1
  608. //
  609. // In order to avoid false sharing the objects of this class should be
  610. // chache-line aligned.
  611. template <const uptr kSpaceBeg, const u64 kSpaceSize,
  612. const uptr kMetadataSize, class SizeClassMap,
  613. const uptr kRegionSizeLog,
  614. class ByteMap,
  615. class MapUnmapCallback = NoOpMapUnmapCallback>
  616. class SizeClassAllocator32 {
  617. public:
  618. typedef typename SizeClassMap::TransferBatch Batch;
  619. typedef SizeClassAllocator32<kSpaceBeg, kSpaceSize, kMetadataSize,
  620. SizeClassMap, kRegionSizeLog, ByteMap, MapUnmapCallback> ThisT;
  621. typedef SizeClassAllocatorLocalCache<ThisT> AllocatorCache;
  622. void Init() {
  623. possible_regions.TestOnlyInit();
  624. internal_memset(size_class_info_array, 0, sizeof(size_class_info_array));
  625. }
  626. void *MapWithCallback(uptr size) {
  627. size = RoundUpTo(size, GetPageSizeCached());
  628. void *res = MmapOrDie(size, "SizeClassAllocator32");
  629. MapUnmapCallback().OnMap((uptr)res, size);
  630. return res;
  631. }
  632. void UnmapWithCallback(uptr beg, uptr size) {
  633. MapUnmapCallback().OnUnmap(beg, size);
  634. UnmapOrDie(reinterpret_cast<void *>(beg), size);
  635. }
  636. static bool CanAllocate(uptr size, uptr alignment) {
  637. return size <= SizeClassMap::kMaxSize &&
  638. alignment <= SizeClassMap::kMaxSize;
  639. }
  640. void *GetMetaData(const void *p) {
  641. CHECK(PointerIsMine(p));
  642. uptr mem = reinterpret_cast<uptr>(p);
  643. uptr beg = ComputeRegionBeg(mem);
  644. uptr size = SizeClassMap::Size(GetSizeClass(p));
  645. u32 offset = mem - beg;
  646. uptr n = offset / (u32)size; // 32-bit division
  647. uptr meta = (beg + kRegionSize) - (n + 1) * kMetadataSize;
  648. return reinterpret_cast<void*>(meta);
  649. }
  650. NOINLINE Batch* AllocateBatch(AllocatorStats *stat, AllocatorCache *c,
  651. uptr class_id) {
  652. CHECK_LT(class_id, kNumClasses);
  653. SizeClassInfo *sci = GetSizeClassInfo(class_id);
  654. SpinMutexLock l(&sci->mutex);
  655. if (sci->free_list.empty())
  656. PopulateFreeList(stat, c, sci, class_id);
  657. CHECK(!sci->free_list.empty());
  658. Batch *b = sci->free_list.front();
  659. sci->free_list.pop_front();
  660. return b;
  661. }
  662. NOINLINE void DeallocateBatch(AllocatorStats *stat, uptr class_id, Batch *b) {
  663. CHECK_LT(class_id, kNumClasses);
  664. SizeClassInfo *sci = GetSizeClassInfo(class_id);
  665. SpinMutexLock l(&sci->mutex);
  666. CHECK_GT(b->count, 0);
  667. sci->free_list.push_front(b);
  668. }
  669. bool PointerIsMine(const void *p) {
  670. return GetSizeClass(p) != 0;
  671. }
  672. uptr GetSizeClass(const void *p) {
  673. return possible_regions[ComputeRegionId(reinterpret_cast<uptr>(p))];
  674. }
  675. void *GetBlockBegin(const void *p) {
  676. CHECK(PointerIsMine(p));
  677. uptr mem = reinterpret_cast<uptr>(p);
  678. uptr beg = ComputeRegionBeg(mem);
  679. uptr size = SizeClassMap::Size(GetSizeClass(p));
  680. u32 offset = mem - beg;
  681. u32 n = offset / (u32)size; // 32-bit division
  682. uptr res = beg + (n * (u32)size);
  683. return reinterpret_cast<void*>(res);
  684. }
  685. uptr GetActuallyAllocatedSize(void *p) {
  686. CHECK(PointerIsMine(p));
  687. return SizeClassMap::Size(GetSizeClass(p));
  688. }
  689. uptr ClassID(uptr size) { return SizeClassMap::ClassID(size); }
  690. uptr TotalMemoryUsed() {
  691. // No need to lock here.
  692. uptr res = 0;
  693. for (uptr i = 0; i < kNumPossibleRegions; i++)
  694. if (possible_regions[i])
  695. res += kRegionSize;
  696. return res;
  697. }
  698. void TestOnlyUnmap() {
  699. for (uptr i = 0; i < kNumPossibleRegions; i++)
  700. if (possible_regions[i])
  701. UnmapWithCallback((i * kRegionSize), kRegionSize);
  702. }
  703. // ForceLock() and ForceUnlock() are needed to implement Darwin malloc zone
  704. // introspection API.
  705. void ForceLock() {
  706. for (uptr i = 0; i < kNumClasses; i++) {
  707. GetSizeClassInfo(i)->mutex.Lock();
  708. }
  709. }
  710. void ForceUnlock() {
  711. for (int i = kNumClasses - 1; i >= 0; i--) {
  712. GetSizeClassInfo(i)->mutex.Unlock();
  713. }
  714. }
  715. // Iterate over all existing chunks.
  716. // The allocator must be locked when calling this function.
  717. void ForEachChunk(ForEachChunkCallback callback, void *arg) {
  718. for (uptr region = 0; region < kNumPossibleRegions; region++)
  719. if (possible_regions[region]) {
  720. uptr chunk_size = SizeClassMap::Size(possible_regions[region]);
  721. uptr max_chunks_in_region = kRegionSize / (chunk_size + kMetadataSize);
  722. uptr region_beg = region * kRegionSize;
  723. for (uptr chunk = region_beg;
  724. chunk < region_beg + max_chunks_in_region * chunk_size;
  725. chunk += chunk_size) {
  726. // Too slow: CHECK_EQ((void *)chunk, GetBlockBegin((void *)chunk));
  727. callback(chunk, arg);
  728. }
  729. }
  730. }
  731. void PrintStats() {
  732. }
  733. typedef SizeClassMap SizeClassMapT;
  734. static const uptr kNumClasses = SizeClassMap::kNumClasses;
  735. private:
  736. static const uptr kRegionSize = 1 << kRegionSizeLog;
  737. static const uptr kNumPossibleRegions = kSpaceSize / kRegionSize;
  738. struct SizeClassInfo {
  739. SpinMutex mutex;
  740. IntrusiveList<Batch> free_list;
  741. char padding[kCacheLineSize - sizeof(uptr) - sizeof(IntrusiveList<Batch>)];
  742. };
  743. COMPILER_CHECK(sizeof(SizeClassInfo) == kCacheLineSize);
  744. uptr ComputeRegionId(uptr mem) {
  745. uptr res = mem >> kRegionSizeLog;
  746. CHECK_LT(res, kNumPossibleRegions);
  747. return res;
  748. }
  749. uptr ComputeRegionBeg(uptr mem) {
  750. return mem & ~(kRegionSize - 1);
  751. }
  752. uptr AllocateRegion(AllocatorStats *stat, uptr class_id) {
  753. CHECK_LT(class_id, kNumClasses);
  754. uptr res = reinterpret_cast<uptr>(MmapAlignedOrDie(kRegionSize, kRegionSize,
  755. "SizeClassAllocator32"));
  756. MapUnmapCallback().OnMap(res, kRegionSize);
  757. stat->Add(AllocatorStatMapped, kRegionSize);
  758. CHECK_EQ(0U, (res & (kRegionSize - 1)));
  759. possible_regions.set(ComputeRegionId(res), static_cast<u8>(class_id));
  760. return res;
  761. }
  762. SizeClassInfo *GetSizeClassInfo(uptr class_id) {
  763. CHECK_LT(class_id, kNumClasses);
  764. return &size_class_info_array[class_id];
  765. }
  766. void PopulateFreeList(AllocatorStats *stat, AllocatorCache *c,
  767. SizeClassInfo *sci, uptr class_id) {
  768. uptr size = SizeClassMap::Size(class_id);
  769. uptr reg = AllocateRegion(stat, class_id);
  770. uptr n_chunks = kRegionSize / (size + kMetadataSize);
  771. uptr max_count = SizeClassMap::MaxCached(class_id);
  772. Batch *b = 0;
  773. for (uptr i = reg; i < reg + n_chunks * size; i += size) {
  774. if (b == 0) {
  775. if (SizeClassMap::SizeClassRequiresSeparateTransferBatch(class_id))
  776. b = (Batch*)c->Allocate(this, SizeClassMap::ClassID(sizeof(Batch)));
  777. else
  778. b = (Batch*)i;
  779. b->count = 0;
  780. }
  781. b->batch[b->count++] = (void*)i;
  782. if (b->count == max_count) {
  783. CHECK_GT(b->count, 0);
  784. sci->free_list.push_back(b);
  785. b = 0;
  786. }
  787. }
  788. if (b) {
  789. CHECK_GT(b->count, 0);
  790. sci->free_list.push_back(b);
  791. }
  792. }
  793. ByteMap possible_regions;
  794. SizeClassInfo size_class_info_array[kNumClasses];
  795. };
  796. // Objects of this type should be used as local caches for SizeClassAllocator64
  797. // or SizeClassAllocator32. Since the typical use of this class is to have one
  798. // object per thread in TLS, is has to be POD.
  799. template<class SizeClassAllocator>
  800. struct SizeClassAllocatorLocalCache {
  801. typedef SizeClassAllocator Allocator;
  802. static const uptr kNumClasses = SizeClassAllocator::kNumClasses;
  803. void Init(AllocatorGlobalStats *s) {
  804. stats_.Init();
  805. if (s)
  806. s->Register(&stats_);
  807. }
  808. void Destroy(SizeClassAllocator *allocator, AllocatorGlobalStats *s) {
  809. Drain(allocator);
  810. if (s)
  811. s->Unregister(&stats_);
  812. }
  813. void *Allocate(SizeClassAllocator *allocator, uptr class_id) {
  814. CHECK_NE(class_id, 0UL);
  815. CHECK_LT(class_id, kNumClasses);
  816. stats_.Add(AllocatorStatAllocated, SizeClassMap::Size(class_id));
  817. PerClass *c = &per_class_[class_id];
  818. if (UNLIKELY(c->count == 0))
  819. Refill(allocator, class_id);
  820. void *res = c->batch[--c->count];
  821. PREFETCH(c->batch[c->count - 1]);
  822. return res;
  823. }
  824. void Deallocate(SizeClassAllocator *allocator, uptr class_id, void *p) {
  825. CHECK_NE(class_id, 0UL);
  826. CHECK_LT(class_id, kNumClasses);
  827. // If the first allocator call on a new thread is a deallocation, then
  828. // max_count will be zero, leading to check failure.
  829. InitCache();
  830. stats_.Sub(AllocatorStatAllocated, SizeClassMap::Size(class_id));
  831. PerClass *c = &per_class_[class_id];
  832. CHECK_NE(c->max_count, 0UL);
  833. if (UNLIKELY(c->count == c->max_count))
  834. Drain(allocator, class_id);
  835. c->batch[c->count++] = p;
  836. }
  837. void Drain(SizeClassAllocator *allocator) {
  838. for (uptr class_id = 0; class_id < kNumClasses; class_id++) {
  839. PerClass *c = &per_class_[class_id];
  840. while (c->count > 0)
  841. Drain(allocator, class_id);
  842. }
  843. }
  844. // private:
  845. typedef typename SizeClassAllocator::SizeClassMapT SizeClassMap;
  846. typedef typename SizeClassMap::TransferBatch Batch;
  847. struct PerClass {
  848. uptr count;
  849. uptr max_count;
  850. void *batch[2 * SizeClassMap::kMaxNumCached];
  851. };
  852. PerClass per_class_[kNumClasses];
  853. AllocatorStats stats_;
  854. void InitCache() {
  855. if (per_class_[1].max_count)
  856. return;
  857. for (uptr i = 0; i < kNumClasses; i++) {
  858. PerClass *c = &per_class_[i];
  859. c->max_count = 2 * SizeClassMap::MaxCached(i);
  860. }
  861. }
  862. NOINLINE void Refill(SizeClassAllocator *allocator, uptr class_id) {
  863. InitCache();
  864. PerClass *c = &per_class_[class_id];
  865. Batch *b = allocator->AllocateBatch(&stats_, this, class_id);
  866. CHECK_GT(b->count, 0);
  867. for (uptr i = 0; i < b->count; i++)
  868. c->batch[i] = b->batch[i];
  869. c->count = b->count;
  870. if (SizeClassMap::SizeClassRequiresSeparateTransferBatch(class_id))
  871. Deallocate(allocator, SizeClassMap::ClassID(sizeof(Batch)), b);
  872. }
  873. NOINLINE void Drain(SizeClassAllocator *allocator, uptr class_id) {
  874. InitCache();
  875. PerClass *c = &per_class_[class_id];
  876. Batch *b;
  877. if (SizeClassMap::SizeClassRequiresSeparateTransferBatch(class_id))
  878. b = (Batch*)Allocate(allocator, SizeClassMap::ClassID(sizeof(Batch)));
  879. else
  880. b = (Batch*)c->batch[0];
  881. uptr cnt = Min(c->max_count / 2, c->count);
  882. for (uptr i = 0; i < cnt; i++) {
  883. b->batch[i] = c->batch[i];
  884. c->batch[i] = c->batch[i + c->max_count / 2];
  885. }
  886. b->count = cnt;
  887. c->count -= cnt;
  888. CHECK_GT(b->count, 0);
  889. allocator->DeallocateBatch(&stats_, class_id, b);
  890. }
  891. };
  892. // This class can (de)allocate only large chunks of memory using mmap/unmap.
  893. // The main purpose of this allocator is to cover large and rare allocation
  894. // sizes not covered by more efficient allocators (e.g. SizeClassAllocator64).
  895. template <class MapUnmapCallback = NoOpMapUnmapCallback>
  896. class LargeMmapAllocator {
  897. public:
  898. void Init() {
  899. internal_memset(this, 0, sizeof(*this));
  900. page_size_ = GetPageSizeCached();
  901. }
  902. void *Allocate(AllocatorStats *stat, uptr size, uptr alignment) {
  903. CHECK(IsPowerOfTwo(alignment));
  904. uptr map_size = RoundUpMapSize(size);
  905. if (alignment > page_size_)
  906. map_size += alignment;
  907. if (map_size < size) return AllocatorReturnNull(); // Overflow.
  908. uptr map_beg = reinterpret_cast<uptr>(
  909. MmapOrDie(map_size, "LargeMmapAllocator"));
  910. CHECK(IsAligned(map_beg, page_size_));
  911. MapUnmapCallback().OnMap(map_beg, map_size);
  912. uptr map_end = map_beg + map_size;
  913. uptr res = map_beg + page_size_;
  914. if (res & (alignment - 1)) // Align.
  915. res += alignment - (res & (alignment - 1));
  916. CHECK(IsAligned(res, alignment));
  917. CHECK(IsAligned(res, page_size_));
  918. CHECK_GE(res + size, map_beg);
  919. CHECK_LE(res + size, map_end);
  920. Header *h = GetHeader(res);
  921. h->size = size;
  922. h->map_beg = map_beg;
  923. h->map_size = map_size;
  924. uptr size_log = MostSignificantSetBitIndex(map_size);
  925. CHECK_LT(size_log, ARRAY_SIZE(stats.by_size_log));
  926. {
  927. SpinMutexLock l(&mutex_);
  928. uptr idx = n_chunks_++;
  929. chunks_sorted_ = false;
  930. CHECK_LT(idx, kMaxNumChunks);
  931. h->chunk_idx = idx;
  932. chunks_[idx] = h;
  933. stats.n_allocs++;
  934. stats.currently_allocated += map_size;
  935. stats.max_allocated = Max(stats.max_allocated, stats.currently_allocated);
  936. stats.by_size_log[size_log]++;
  937. stat->Add(AllocatorStatAllocated, map_size);
  938. stat->Add(AllocatorStatMapped, map_size);
  939. }
  940. return reinterpret_cast<void*>(res);
  941. }
  942. void Deallocate(AllocatorStats *stat, void *p) {
  943. Header *h = GetHeader(p);
  944. {
  945. SpinMutexLock l(&mutex_);
  946. uptr idx = h->chunk_idx;
  947. CHECK_EQ(chunks_[idx], h);
  948. CHECK_LT(idx, n_chunks_);
  949. chunks_[idx] = chunks_[n_chunks_ - 1];
  950. chunks_[idx]->chunk_idx = idx;
  951. n_chunks_--;
  952. chunks_sorted_ = false;
  953. stats.n_frees++;
  954. stats.currently_allocated -= h->map_size;
  955. stat->Sub(AllocatorStatAllocated, h->map_size);
  956. stat->Sub(AllocatorStatMapped, h->map_size);
  957. }
  958. MapUnmapCallback().OnUnmap(h->map_beg, h->map_size);
  959. UnmapOrDie(reinterpret_cast<void*>(h->map_beg), h->map_size);
  960. }
  961. uptr TotalMemoryUsed() {
  962. SpinMutexLock l(&mutex_);
  963. uptr res = 0;
  964. for (uptr i = 0; i < n_chunks_; i++) {
  965. Header *h = chunks_[i];
  966. CHECK_EQ(h->chunk_idx, i);
  967. res += RoundUpMapSize(h->size);
  968. }
  969. return res;
  970. }
  971. bool PointerIsMine(const void *p) {
  972. return GetBlockBegin(p) != 0;
  973. }
  974. uptr GetActuallyAllocatedSize(void *p) {
  975. return RoundUpTo(GetHeader(p)->size, page_size_);
  976. }
  977. // At least page_size_/2 metadata bytes is available.
  978. void *GetMetaData(const void *p) {
  979. // Too slow: CHECK_EQ(p, GetBlockBegin(p));
  980. if (!IsAligned(reinterpret_cast<uptr>(p), page_size_)) {
  981. Printf("%s: bad pointer %p\n", SanitizerToolName, p);
  982. CHECK(IsAligned(reinterpret_cast<uptr>(p), page_size_));
  983. }
  984. return GetHeader(p) + 1;
  985. }
  986. void *GetBlockBegin(const void *ptr) {
  987. uptr p = reinterpret_cast<uptr>(ptr);
  988. SpinMutexLock l(&mutex_);
  989. uptr nearest_chunk = 0;
  990. // Cache-friendly linear search.
  991. for (uptr i = 0; i < n_chunks_; i++) {
  992. uptr ch = reinterpret_cast<uptr>(chunks_[i]);
  993. if (p < ch) continue; // p is at left to this chunk, skip it.
  994. if (p - ch < p - nearest_chunk)
  995. nearest_chunk = ch;
  996. }
  997. if (!nearest_chunk)
  998. return 0;
  999. Header *h = reinterpret_cast<Header *>(nearest_chunk);
  1000. CHECK_GE(nearest_chunk, h->map_beg);
  1001. CHECK_LT(nearest_chunk, h->map_beg + h->map_size);
  1002. CHECK_LE(nearest_chunk, p);
  1003. if (h->map_beg + h->map_size <= p)
  1004. return 0;
  1005. return GetUser(h);
  1006. }
  1007. // This function does the same as GetBlockBegin, but is much faster.
  1008. // Must be called with the allocator locked.
  1009. void *GetBlockBeginFastLocked(void *ptr) {
  1010. mutex_.CheckLocked();
  1011. uptr p = reinterpret_cast<uptr>(ptr);
  1012. uptr n = n_chunks_;
  1013. if (!n) return 0;
  1014. if (!chunks_sorted_) {
  1015. // Do one-time sort. chunks_sorted_ is reset in Allocate/Deallocate.
  1016. SortArray(reinterpret_cast<uptr*>(chunks_), n);
  1017. for (uptr i = 0; i < n; i++)
  1018. chunks_[i]->chunk_idx = i;
  1019. chunks_sorted_ = true;
  1020. min_mmap_ = reinterpret_cast<uptr>(chunks_[0]);
  1021. max_mmap_ = reinterpret_cast<uptr>(chunks_[n - 1]) +
  1022. chunks_[n - 1]->map_size;
  1023. }
  1024. if (p < min_mmap_ || p >= max_mmap_)
  1025. return 0;
  1026. uptr beg = 0, end = n - 1;
  1027. // This loop is a log(n) lower_bound. It does not check for the exact match
  1028. // to avoid expensive cache-thrashing loads.
  1029. while (end - beg >= 2) {
  1030. uptr mid = (beg + end) / 2; // Invariant: mid >= beg + 1
  1031. if (p < reinterpret_cast<uptr>(chunks_[mid]))
  1032. end = mid - 1; // We are not interested in chunks_[mid].
  1033. else
  1034. beg = mid; // chunks_[mid] may still be what we want.
  1035. }
  1036. if (beg < end) {
  1037. CHECK_EQ(beg + 1, end);
  1038. // There are 2 chunks left, choose one.
  1039. if (p >= reinterpret_cast<uptr>(chunks_[end]))
  1040. beg = end;
  1041. }
  1042. Header *h = chunks_[beg];
  1043. if (h->map_beg + h->map_size <= p || p < h->map_beg)
  1044. return 0;
  1045. return GetUser(h);
  1046. }
  1047. void PrintStats() {
  1048. Printf("Stats: LargeMmapAllocator: allocated %zd times, "
  1049. "remains %zd (%zd K) max %zd M; by size logs: ",
  1050. stats.n_allocs, stats.n_allocs - stats.n_frees,
  1051. stats.currently_allocated >> 10, stats.max_allocated >> 20);
  1052. for (uptr i = 0; i < ARRAY_SIZE(stats.by_size_log); i++) {
  1053. uptr c = stats.by_size_log[i];
  1054. if (!c) continue;
  1055. Printf("%zd:%zd; ", i, c);
  1056. }
  1057. Printf("\n");
  1058. }
  1059. // ForceLock() and ForceUnlock() are needed to implement Darwin malloc zone
  1060. // introspection API.
  1061. void ForceLock() {
  1062. mutex_.Lock();
  1063. }
  1064. void ForceUnlock() {
  1065. mutex_.Unlock();
  1066. }
  1067. // Iterate over all existing chunks.
  1068. // The allocator must be locked when calling this function.
  1069. void ForEachChunk(ForEachChunkCallback callback, void *arg) {
  1070. for (uptr i = 0; i < n_chunks_; i++)
  1071. callback(reinterpret_cast<uptr>(GetUser(chunks_[i])), arg);
  1072. }
  1073. private:
  1074. static const int kMaxNumChunks = 1 << FIRST_32_SECOND_64(15, 18);
  1075. struct Header {
  1076. uptr map_beg;
  1077. uptr map_size;
  1078. uptr size;
  1079. uptr chunk_idx;
  1080. };
  1081. Header *GetHeader(uptr p) {
  1082. CHECK(IsAligned(p, page_size_));
  1083. return reinterpret_cast<Header*>(p - page_size_);
  1084. }
  1085. Header *GetHeader(const void *p) {
  1086. return GetHeader(reinterpret_cast<uptr>(p));
  1087. }
  1088. void *GetUser(Header *h) {
  1089. CHECK(IsAligned((uptr)h, page_size_));
  1090. return reinterpret_cast<void*>(reinterpret_cast<uptr>(h) + page_size_);
  1091. }
  1092. uptr RoundUpMapSize(uptr size) {
  1093. return RoundUpTo(size, page_size_) + page_size_;
  1094. }
  1095. uptr page_size_;
  1096. Header *chunks_[kMaxNumChunks];
  1097. uptr n_chunks_;
  1098. uptr min_mmap_, max_mmap_;
  1099. bool chunks_sorted_;
  1100. struct Stats {
  1101. uptr n_allocs, n_frees, currently_allocated, max_allocated, by_size_log[64];
  1102. } stats;
  1103. SpinMutex mutex_;
  1104. };
  1105. // This class implements a complete memory allocator by using two
  1106. // internal allocators:
  1107. // PrimaryAllocator is efficient, but may not allocate some sizes (alignments).
  1108. // When allocating 2^x bytes it should return 2^x aligned chunk.
  1109. // PrimaryAllocator is used via a local AllocatorCache.
  1110. // SecondaryAllocator can allocate anything, but is not efficient.
  1111. template <class PrimaryAllocator, class AllocatorCache,
  1112. class SecondaryAllocator> // NOLINT
  1113. class CombinedAllocator {
  1114. public:
  1115. void Init() {
  1116. primary_.Init();
  1117. secondary_.Init();
  1118. stats_.Init();
  1119. }
  1120. void *Allocate(AllocatorCache *cache, uptr size, uptr alignment,
  1121. bool cleared = false) {
  1122. // Returning 0 on malloc(0) may break a lot of code.
  1123. if (size == 0)
  1124. size = 1;
  1125. if (size + alignment < size)
  1126. return AllocatorReturnNull();
  1127. if (alignment > 8)
  1128. size = RoundUpTo(size, alignment);
  1129. void *res;
  1130. bool from_primary = primary_.CanAllocate(size, alignment);
  1131. if (from_primary)
  1132. res = cache->Allocate(&primary_, primary_.ClassID(size));
  1133. else
  1134. res = secondary_.Allocate(&stats_, size, alignment);
  1135. if (alignment > 8)
  1136. CHECK_EQ(reinterpret_cast<uptr>(res) & (alignment - 1), 0);
  1137. if (cleared && res && from_primary)
  1138. internal_bzero_aligned16(res, RoundUpTo(size, 16));
  1139. return res;
  1140. }
  1141. void Deallocate(AllocatorCache *cache, void *p) {
  1142. if (!p) return;
  1143. if (primary_.PointerIsMine(p))
  1144. cache->Deallocate(&primary_, primary_.GetSizeClass(p), p);
  1145. else
  1146. secondary_.Deallocate(&stats_, p);
  1147. }
  1148. void *Reallocate(AllocatorCache *cache, void *p, uptr new_size,
  1149. uptr alignment) {
  1150. if (!p)
  1151. return Allocate(cache, new_size, alignment);
  1152. if (!new_size) {
  1153. Deallocate(cache, p);
  1154. return 0;
  1155. }
  1156. CHECK(PointerIsMine(p));
  1157. uptr old_size = GetActuallyAllocatedSize(p);
  1158. uptr memcpy_size = Min(new_size, old_size);
  1159. void *new_p = Allocate(cache, new_size, alignment);
  1160. if (new_p)
  1161. internal_memcpy(new_p, p, memcpy_size);
  1162. Deallocate(cache, p);
  1163. return new_p;
  1164. }
  1165. bool PointerIsMine(void *p) {
  1166. if (primary_.PointerIsMine(p))
  1167. return true;
  1168. return secondary_.PointerIsMine(p);
  1169. }
  1170. bool FromPrimary(void *p) {
  1171. return primary_.PointerIsMine(p);
  1172. }
  1173. void *GetMetaData(const void *p) {
  1174. if (primary_.PointerIsMine(p))
  1175. return primary_.GetMetaData(p);
  1176. return secondary_.GetMetaData(p);
  1177. }
  1178. void *GetBlockBegin(const void *p) {
  1179. if (primary_.PointerIsMine(p))
  1180. return primary_.GetBlockBegin(p);
  1181. return secondary_.GetBlockBegin(p);
  1182. }
  1183. // This function does the same as GetBlockBegin, but is much faster.
  1184. // Must be called with the allocator locked.
  1185. void *GetBlockBeginFastLocked(void *p) {
  1186. if (primary_.PointerIsMine(p))
  1187. return primary_.GetBlockBegin(p);
  1188. return secondary_.GetBlockBeginFastLocked(p);
  1189. }
  1190. uptr GetActuallyAllocatedSize(void *p) {
  1191. if (primary_.PointerIsMine(p))
  1192. return primary_.GetActuallyAllocatedSize(p);
  1193. return secondary_.GetActuallyAllocatedSize(p);
  1194. }
  1195. uptr TotalMemoryUsed() {
  1196. return primary_.TotalMemoryUsed() + secondary_.TotalMemoryUsed();
  1197. }
  1198. void TestOnlyUnmap() { primary_.TestOnlyUnmap(); }
  1199. void InitCache(AllocatorCache *cache) {
  1200. cache->Init(&stats_);
  1201. }
  1202. void DestroyCache(AllocatorCache *cache) {
  1203. cache->Destroy(&primary_, &stats_);
  1204. }
  1205. void SwallowCache(AllocatorCache *cache) {
  1206. cache->Drain(&primary_);
  1207. }
  1208. void GetStats(AllocatorStatCounters s) const {
  1209. stats_.Get(s);
  1210. }
  1211. void PrintStats() {
  1212. primary_.PrintStats();
  1213. secondary_.PrintStats();
  1214. }
  1215. // ForceLock() and ForceUnlock() are needed to implement Darwin malloc zone
  1216. // introspection API.
  1217. void ForceLock() {
  1218. primary_.ForceLock();
  1219. secondary_.ForceLock();
  1220. }
  1221. void ForceUnlock() {
  1222. secondary_.ForceUnlock();
  1223. primary_.ForceUnlock();
  1224. }
  1225. // Iterate over all existing chunks.
  1226. // The allocator must be locked when calling this function.
  1227. void ForEachChunk(ForEachChunkCallback callback, void *arg) {
  1228. primary_.ForEachChunk(callback, arg);
  1229. secondary_.ForEachChunk(callback, arg);
  1230. }
  1231. private:
  1232. PrimaryAllocator primary_;
  1233. SecondaryAllocator secondary_;
  1234. AllocatorGlobalStats stats_;
  1235. };
  1236. // Returns true if calloc(size, n) should return 0 due to overflow in size*n.
  1237. bool CallocShouldReturnNullDueToOverflow(uptr size, uptr n);
  1238. } // namespace __sanitizer
  1239. #endif // SANITIZER_ALLOCATOR_H