Hello! Here’s a proposed patch that follows your suggestion, Mark, but places an upper bound on the number of directory entries loaded in memory.
On my laptop, which has ~500k entries in /gnu/store/.links, the result is something like this (notice the inode numbers in ‘lstat’ calls): --8<---------------cut here---------------start------------->8--- 13738 write(4, "gmlo\0\0\0\0\31\0\0\0\0\0\0\0deleting unused "..., 48) = 48 13738 open("/gnu/store/.links", O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC) = 8 13738 fstat(8, {st_dev=makedev(8, 2), st_ino=4014083, st_mode=S_IFDIR|0755, st_nlink=2, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=119224, st_size=60977152, st_atime=2016/12/11-12:01:59, st_mtime=2016/12/11-09:39:45, st_ctime=2016/12/11-09:39:45}) = 0 13738 getdents(8, {{d_ino=4014083, d_off=4294967296, d_reclen=24, d_name="."} [...] 13738 lstat("/gnu/store/.links/1f2f3g8waxwymp9sl2slcfyara164i8w1y2sz3h9js2fcviv2rnc", {st_dev=makedev(8, 2), st_ino=47, st_mode=S_IFREG|0444, st_nlink=2, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=88, st_size=41482, st_atime=2015/03/10-11:29:06, st_mtime=1970/01/01-01:00:01, st_ctime=2015/11/25-11:29:20}) = 0 13738 lstat("/gnu/store/.links/1p25kpyfw354in4kykmgh5sy9h925hnil1jdzgxhz7n6abbws8px", {st_dev=makedev(8, 2), st_ino=65, st_mode=S_IFREG|0444, st_nlink=9, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=8, st_size=2313, st_atime=2016/12/08-21:02:26, st_mtime=1970/01/01-01:00:01, st_ctime=2016/12/11-01:44:49}) = 0 13738 lstat("/gnu/store/.links/163187g637br9ys5pmshb01wjav53bs1g1a83m7c2alpdyx3yqz2", {st_dev=makedev(8, 2), st_ino=68, st_mode=S_IFREG|0444, st_nlink=2, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=32, st_size=13561, st_atime=2015/03/10-11:29:06, st_mtime=1970/01/01-01:00:01, st_ctime=2015/11/25-11:29:20}) = 0 [...] 13738 getdents(8, {{d_ino=4257114, d_off=1734093409898198492, [...] 13738 lstat("/gnu/store/.links/1m6g06i01ybbkhjjbirjnj7fckw1b772cwygkvbd6v6zgkln7f7m", {st_dev=makedev(8, 2), st_ino=19, st_mode=S_IFREG|0444, st_nlink=4, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=8, st_size=2553, st_atime=2016/12/08-21:02:54, st_mtime=1970/01/01-01:00:01, st_ctime=2016/12/07-00:05:19}) = 0 13738 lstat("/gnu/store/.links/1ml8n3q55ikn8h60sn67jq1y7z7mvdp5kwr33pqrid2r6kk1d4kb", {st_dev=makedev(8, 2), st_ino=30, st_mode=S_IFREG|0444, st_nlink=2, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=8, st_size=2090, st_atime=2015/03/10-11:29:06, st_mtime=1970/01/01-01:00:01, st_ctime=2015/11/25-11:29:21}) = 0 13738 lstat("/gnu/store/.links/1c8cvwlqyyqby3k13cwm40g26pwca5iiz5dcj43xrgn9y91lfvc2", {st_dev=makedev(8, 2), st_ino=33, st_mode=S_IFREG|0444, st_nlink=2, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=16, st_size=7958, st_atime=2015/11/04-18:55:32, st_mtime=1970/01/01-01:00:01, st_ctime=2016/01/05-11:35:49}) = 0 [...] 13738 getdents(8, {{d_ino=328672, [...] 13738 lstat("/gnu/store/.links/1l55l59dxb74himmkfzx5v63cv7287i6rjhdns1fdlwajqd73lnz", {st_dev=makedev(8, 2), st_ino=21, st_mode=S_IFREG|0444, st_nlink=31, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=8, st_size=615, st_atime=2016/12/08-21:02:30, st_mtime=1970/01/01-01:00:01, st_ctime=2016/12/11-01:44:47}) = 0 13738 lstat("/gnu/store/.links/1c7mm5amw743mb45f1zg4d4r3g549ch35wks9izkcgkx0jirpxsg", {st_dev=makedev(8, 2), st_ino=48, st_mode=S_IFREG|0444, st_nlink=2, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=360, st_size=176750, st_atime=2015/03/10-11:29:06, st_mtime=1970/01/01-01:00:01, st_ctime=2015/11/25-11:29:20}) = 0 13738 lstat("/gnu/store/.links/0z5s7b0yk8mfn1np6gk3cdbmpnjgxg1g0l8vfq1aa01zwp06d3f0", {st_dev=makedev(8, 2), st_ino=61, st_mode=S_IFREG|0444, st_nlink=2, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=8, st_size=1440, st_atime=2016/11/03-20:37:50, st_mtime=1970/01/01-01:00:01, st_ctime=2016/11/07-00:01:57}) = 0 --8<---------------cut here---------------end--------------->8--- I can’t tell exactly how this affects performance because my machine has an SSD where this operation takes ~3 seconds on a cold cache. I suspect it has performance comparable to that of doing all the ‘readdir’ at once followed by all the ‘lstat’. Mark, how does that sound? I’d like to commit it soon if there are no objections. Thanks, Ludo’.
diff --git a/nix/libstore/gc.cc b/nix/libstore/gc.cc index 72eff52..db58603 100644 --- a/nix/libstore/gc.cc +++ b/nix/libstore/gc.cc @@ -545,6 +545,9 @@ void LocalStore::tryToDelete(GCState & state, const Path & path) } +/* Like 'dirent', but with just what we need. */ +typedef std::pair<Path, ino_t> MiniDirEntry; + /* Unlink all files in /nix/store/.links that have a link count of 1, which indicates that there are no other links and so they can be safely deleted. FIXME: race condition with optimisePath(): we @@ -555,32 +558,57 @@ void LocalStore::removeUnusedLinks(const GCState & state) AutoCloseDir dir = opendir(linksDir.c_str()); if (!dir) throw SysError(format("opening directory `%1%'") % linksDir); + /* Maximum number of entries stored in memory; each 'MiniDirEntry' takes + ~80 bytes. */ + const size_t maxEntries = 100000; + long long actualSize = 0, unsharedSize = 0; - struct dirent * dirent; - while (errno = 0, dirent = readdir(dir)) { - checkInterrupt(); - string name = dirent->d_name; - if (name == "." || name == "..") continue; - Path path = linksDir + "/" + name; - - struct stat st; - if (lstat(path.c_str(), &st) == -1) - throw SysError(format("statting `%1%'") % path); - - if (st.st_nlink != 1) { - unsigned long long size = st.st_blocks * 512ULL; - actualSize += size; - unsharedSize += (st.st_nlink - 1) * size; - continue; - } - - printMsg(lvlTalkative, format("deleting unused link `%1%'") % path); - - if (unlink(path.c_str()) == -1) - throw SysError(format("deleting `%1%'") % path); - - state.results.bytesFreed += st.st_blocks * 512; + bool endOfDir = false; + + while (!endOfDir) { + /* Read as many entries as possible at once, up to 'maxEntries'. */ + std::list<MiniDirEntry> entries; + struct dirent * dirent; + while (errno = 0, + (entries.size() < maxEntries) && (dirent = readdir(dir))) { + checkInterrupt(); + string name = dirent->d_name; + if (name == "." || name == "..") continue; + entries.emplace_back(MiniDirEntry(name, dirent->d_ino)); + } + + endOfDir = (dirent == NULL); + + /* Sort entries by inode number to minimize disk seeks induced by the + following 'lstat' calls. */ + entries.sort([] (const MiniDirEntry& e1, const MiniDirEntry& e2) { + return e1.second < e2.second; + }); + + for (auto && item: entries) { + checkInterrupt(); + + Path path = linksDir + "/" + item.first; + + struct stat st; + if (lstat(path.c_str(), &st) == -1) + throw SysError(format("statting `%1%'") % path); + + if (st.st_nlink != 1) { + unsigned long long size = st.st_blocks * 512ULL; + actualSize += size; + unsharedSize += (st.st_nlink - 1) * size; + continue; + } + x+ printMsg(lvlTalkative, format("deleting unused link `%1%'") % path); + + if (unlink(path.c_str()) == -1) + throw SysError(format("deleting `%1%'") % path); + + state.results.bytesFreed += st.st_blocks * 512; + } } struct stat st;