Author: Kostya Kortchinsky Date: 2020-12-17T10:01:57-08:00 New Revision: 1dbf2c96bce93e0a954806d9bdcafcb702a06672
URL: https://github.com/llvm/llvm-project/commit/1dbf2c96bce93e0a954806d9bdcafcb702a06672 DIFF: https://github.com/llvm/llvm-project/commit/1dbf2c96bce93e0a954806d9bdcafcb702a06672.diff LOG: [scudo][standalone] Allow the release of smaller sizes Initially we were avoiding the release of smaller size classes due to the fact that it was an expensive operation, particularly on 32-bit platforms. With a lot of batches, and given that there are a lot of blocks per page, this was a lengthy operation with little results. There has been some improvements since then to the 32-bit release, and we still have some criterias preventing us from wasting time (eg, 9x% free blocks in the class size, etc). Allowing to release blocks < 128 bytes helps in situations where a lot of small chunks would not have been reclaimed if not for a forced reclaiming. Additionally change some `CHECK` to `DCHECK` and rearrange a bit the code. I didn't experience any regressions in my benchmarks. Differential Revision: https://reviews.llvm.org/D93141 Added: Modified: compiler-rt/lib/scudo/standalone/primary32.h compiler-rt/lib/scudo/standalone/primary64.h compiler-rt/lib/scudo/standalone/release.h Removed: ################################################################################ diff --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h index 016a96bd2dfa..0db95d2e1f11 100644 --- a/compiler-rt/lib/scudo/standalone/primary32.h +++ b/compiler-rt/lib/scudo/standalone/primary32.h @@ -76,17 +76,12 @@ class SizeClassAllocator32 { if (UNLIKELY(!getRandom(reinterpret_cast<void *>(&Seed), sizeof(Seed)))) Seed = static_cast<u32>( Time ^ (reinterpret_cast<uptr>(SizeClassInfoArray) >> 6)); - const uptr PageSize = getPageSizeCached(); for (uptr I = 0; I < NumClasses; I++) { SizeClassInfo *Sci = getSizeClassInfo(I); Sci->RandState = getRandomU32(&Seed); // Sci->MaxRegionIndex is already initialized to 0. Sci->MinRegionIndex = NumRegions; - // See comment in the 64-bit primary about releasing smaller size classes. - Sci->CanRelease = (I != SizeClassMap::BatchClassId) && - (getSizeByClassId(I) >= (PageSize / 32)); - if (Sci->CanRelease) - Sci->ReleaseInfo.LastReleaseAtNs = Time; + Sci->ReleaseInfo.LastReleaseAtNs = Time; } setOption(Option::ReleaseInterval, static_cast<sptr>(ReleaseToOsInterval)); } @@ -137,7 +132,7 @@ class SizeClassAllocator32 { ScopedLock L(Sci->Mutex); Sci->FreeList.push_front(B); Sci->Stats.PushedBlocks += B->getCount(); - if (Sci->CanRelease) + if (ClassId != SizeClassMap::BatchClassId) releaseToOSMaybe(Sci, ClassId); } @@ -217,6 +212,8 @@ class SizeClassAllocator32 { uptr releaseToOS() { uptr TotalReleasedBytes = 0; for (uptr I = 0; I < NumClasses; I++) { + if (I == SizeClassMap::BatchClassId) + continue; SizeClassInfo *Sci = getSizeClassInfo(I); ScopedLock L(Sci->Mutex); TotalReleasedBytes += releaseToOSMaybe(Sci, I, /*Force=*/true); @@ -262,7 +259,6 @@ class SizeClassAllocator32 { uptr CurrentRegion; uptr CurrentRegionAllocated; SizeClassStats Stats; - bool CanRelease; u32 RandState; uptr AllocatedUser; // Lowest & highest region index allocated for this size class, to avoid diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h index f6c4d8cf8bb0..f9854cbfd4d6 100644 --- a/compiler-rt/lib/scudo/standalone/primary64.h +++ b/compiler-rt/lib/scudo/standalone/primary64.h @@ -80,17 +80,7 @@ class SizeClassAllocator64 { Region->RegionBeg = getRegionBaseByClassId(I) + (getRandomModN(&Seed, 16) + 1) * PageSize; Region->RandState = getRandomU32(&Seed); - // Releasing smaller size classes doesn't necessarily yield to a - // meaningful RSS impact: there are more blocks per page, they are - // randomized around, and thus pages are less likely to be entirely empty. - // On top of this, attempting to release those require more iterations and - // memory accesses which ends up being fairly costly. The current lower - // limit is mostly arbitrary and based on empirical observations. - // TODO(kostyak): make the lower limit a runtime option - Region->CanRelease = (I != SizeClassMap::BatchClassId) && - (getSizeByClassId(I) >= (PageSize / 32)); - if (Region->CanRelease) - Region->ReleaseInfo.LastReleaseAtNs = Time; + Region->ReleaseInfo.LastReleaseAtNs = Time; } setOption(Option::ReleaseInterval, static_cast<sptr>(ReleaseToOsInterval)); @@ -129,7 +119,7 @@ class SizeClassAllocator64 { ScopedLock L(Region->Mutex); Region->FreeList.push_front(B); Region->Stats.PushedBlocks += B->getCount(); - if (Region->CanRelease) + if (ClassId != SizeClassMap::BatchClassId) releaseToOSMaybe(Region, ClassId); } @@ -201,6 +191,8 @@ class SizeClassAllocator64 { uptr releaseToOS() { uptr TotalReleasedBytes = 0; for (uptr I = 0; I < NumClasses; I++) { + if (I == SizeClassMap::BatchClassId) + continue; RegionInfo *Region = getRegionInfo(I); ScopedLock L(Region->Mutex); TotalReleasedBytes += releaseToOSMaybe(Region, I, /*Force=*/true); @@ -291,7 +283,6 @@ class SizeClassAllocator64 { HybridMutex Mutex; SinglyLinkedList<TransferBatch> FreeList; RegionStats Stats; - bool CanRelease; bool Exhausted; u32 RandState; uptr RegionBeg; @@ -417,7 +408,7 @@ class SizeClassAllocator64 { const uptr BlockSize = getSizeByClassId(ClassId); const uptr PageSize = getPageSizeCached(); - CHECK_GE(Region->Stats.PoppedBlocks, Region->Stats.PushedBlocks); + DCHECK_GE(Region->Stats.PoppedBlocks, Region->Stats.PushedBlocks); const uptr BytesInFreeList = Region->AllocatedUser - (Region->Stats.PoppedBlocks - Region->Stats.PushedBlocks) * BlockSize; diff --git a/compiler-rt/lib/scudo/standalone/release.h b/compiler-rt/lib/scudo/standalone/release.h index cd9e66d63b36..5c11da2200e9 100644 --- a/compiler-rt/lib/scudo/standalone/release.h +++ b/compiler-rt/lib/scudo/standalone/release.h @@ -52,20 +52,20 @@ class PackedCounterArray { PackedCounterArray(uptr NumberOfRegions, uptr CountersPerRegion, uptr MaxValue) : Regions(NumberOfRegions), NumCounters(CountersPerRegion) { - CHECK_GT(Regions, 0); - CHECK_GT(NumCounters, 0); - CHECK_GT(MaxValue, 0); + DCHECK_GT(Regions, 0); + DCHECK_GT(NumCounters, 0); + DCHECK_GT(MaxValue, 0); constexpr uptr MaxCounterBits = sizeof(*Buffer) * 8UL; // Rounding counter storage size up to the power of two allows for using // bit shifts calculating particular counter's Index and offset. const uptr CounterSizeBits = roundUpToPowerOfTwo(getMostSignificantSetBitIndex(MaxValue) + 1); - CHECK_LE(CounterSizeBits, MaxCounterBits); + DCHECK_LE(CounterSizeBits, MaxCounterBits); CounterSizeBitsLog = getLog2(CounterSizeBits); CounterMask = ~(static_cast<uptr>(0)) >> (MaxCounterBits - CounterSizeBits); const uptr PackingRatio = MaxCounterBits >> CounterSizeBitsLog; - CHECK_GT(PackingRatio, 0); + DCHECK_GT(PackingRatio, 0); PackingRatioLog = getLog2(PackingRatio); BitOffsetMask = PackingRatio - 1; @@ -235,20 +235,14 @@ releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList, uptr Base, if (BlockSize <= PageSize && PageSize % BlockSize == 0) { // Each chunk affects one page only. for (const auto &It : FreeList) { - // If dealing with a TransferBatch, the first pointer of the batch will - // point to the batch itself, we do not want to mark this for release as - // the batch is in use, so skip the first entry. - const bool IsTransferBatch = - (It.getCount() != 0) && - (reinterpret_cast<uptr>(It.get(0)) == reinterpret_cast<uptr>(&It)); - for (u32 I = IsTransferBatch ? 1 : 0; I < It.getCount(); I++) { + for (u32 I = 0; I < It.getCount(); I++) { const uptr P = reinterpret_cast<uptr>(It.get(I)) - Base; // This takes care of P < Base and P >= Base + RoundedSize. - if (P < RoundedSize) { - const uptr RegionIndex = NumberOfRegions == 1U ? 0 : P / RegionSize; - const uptr PInRegion = P - RegionIndex * RegionSize; - Counters.inc(RegionIndex, PInRegion >> PageSizeLog); - } + if (UNLIKELY(P >= RoundedSize)) + continue; + const uptr RegionIndex = NumberOfRegions == 1U ? 0 : P / RegionSize; + const uptr PInRegion = P - RegionIndex * RegionSize; + Counters.inc(RegionIndex, PInRegion >> PageSizeLog); } } } else { @@ -256,27 +250,23 @@ releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList, uptr Base, DCHECK_GE(RegionSize, BlockSize); const uptr LastBlockInRegion = ((RegionSize / BlockSize) - 1U) * BlockSize; for (const auto &It : FreeList) { - // See TransferBatch comment above. - const bool IsTransferBatch = - (It.getCount() != 0) && - (reinterpret_cast<uptr>(It.get(0)) == reinterpret_cast<uptr>(&It)); - for (u32 I = IsTransferBatch ? 1 : 0; I < It.getCount(); I++) { + for (u32 I = 0; I < It.getCount(); I++) { const uptr P = reinterpret_cast<uptr>(It.get(I)) - Base; // This takes care of P < Base and P >= Base + RoundedSize. - if (P < RoundedSize) { - const uptr RegionIndex = NumberOfRegions == 1U ? 0 : P / RegionSize; - uptr PInRegion = P - RegionIndex * RegionSize; - Counters.incRange(RegionIndex, PInRegion >> PageSizeLog, - (PInRegion + BlockSize - 1) >> PageSizeLog); - // The last block in a region might straddle a page, so if it's - // free, we mark the following "pretend" memory block(s) as free. - if (PInRegion == LastBlockInRegion) { + if (UNLIKELY(P >= RoundedSize)) + continue; + const uptr RegionIndex = NumberOfRegions == 1U ? 0 : P / RegionSize; + uptr PInRegion = P - RegionIndex * RegionSize; + Counters.incRange(RegionIndex, PInRegion >> PageSizeLog, + (PInRegion + BlockSize - 1) >> PageSizeLog); + // The last block in a region might straddle a page, so if it's + // free, we mark the following "pretend" memory block(s) as free. + if (PInRegion == LastBlockInRegion) { + PInRegion += BlockSize; + while (PInRegion < RoundedRegionSize) { + Counters.incRange(RegionIndex, PInRegion >> PageSizeLog, + (PInRegion + BlockSize - 1) >> PageSizeLog); PInRegion += BlockSize; - while (PInRegion < RoundedRegionSize) { - Counters.incRange(RegionIndex, PInRegion >> PageSizeLog, - (PInRegion + BlockSize - 1) >> PageSizeLog); - PInRegion += BlockSize; - } } } } @@ -327,7 +317,6 @@ releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList, uptr Base, } } PrevPageBoundary = PageBoundary; - RangeTracker.processNextPage(Counters.get(I, J) == BlocksPerPage); } } _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits