GNU bug report logs - #24937
"deleting unused links" GC phase is too slow

Previous Next

Package: guix;

Reported by: ludo <at> gnu.org (Ludovic Courtès)

Date: Sun, 13 Nov 2016 17:42:02 UTC

Severity: important

Full log


Message #13 received at 24937 <at> debbugs.gnu.org (full text, mbox):

From: ludo <at> gnu.org (Ludovic Courtès)
To: 24937 <at> debbugs.gnu.org
Cc: Mark H Weaver <mhw <at> netris.org>
Subject: Re: bug#24937: "deleting unused links" GC phase is too slow
Date: Sun, 11 Dec 2016 14:46:13 +0100
[Message part 1 (text/plain, inline)]
Hello!

Here’s a proposed patch that follows your suggestion, Mark, but places
an upper bound on the number of directory entries loaded in memory.

On my laptop, which has ~500k entries in /gnu/store/.links, the result
is something like this (notice the inode numbers in ‘lstat’ calls):

--8<---------------cut here---------------start------------->8---
13738 write(4, "gmlo\0\0\0\0\31\0\0\0\0\0\0\0deleting unused "..., 48) = 48
13738 open("/gnu/store/.links", O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC) = 8
13738 fstat(8, {st_dev=makedev(8, 2), st_ino=4014083, st_mode=S_IFDIR|0755, st_nlink=2, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=119224, st_size=60977152, st_atime=2016/12/11-12:01:59, st_mtime=2016/12/11-09:39:45, st_ctime=2016/12/11-09:39:45}) = 0
13738 getdents(8, {{d_ino=4014083, d_off=4294967296, d_reclen=24, d_name="."}
[...]
13738 lstat("/gnu/store/.links/1f2f3g8waxwymp9sl2slcfyara164i8w1y2sz3h9js2fcviv2rnc", {st_dev=makedev(8, 2), st_ino=47, st_mode=S_IFREG|0444, st_nlink=2, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=88, st_size=41482, st_atime=2015/03/10-11:29:06, st_mtime=1970/01/01-01:00:01, st_ctime=2015/11/25-11:29:20}) = 0
13738 lstat("/gnu/store/.links/1p25kpyfw354in4kykmgh5sy9h925hnil1jdzgxhz7n6abbws8px", {st_dev=makedev(8, 2), st_ino=65, st_mode=S_IFREG|0444, st_nlink=9, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=8, st_size=2313, st_atime=2016/12/08-21:02:26, st_mtime=1970/01/01-01:00:01, st_ctime=2016/12/11-01:44:49}) = 0
13738 lstat("/gnu/store/.links/163187g637br9ys5pmshb01wjav53bs1g1a83m7c2alpdyx3yqz2", {st_dev=makedev(8, 2), st_ino=68, st_mode=S_IFREG|0444, st_nlink=2, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=32, st_size=13561, st_atime=2015/03/10-11:29:06, st_mtime=1970/01/01-01:00:01, st_ctime=2015/11/25-11:29:20}) = 0
[...]
13738 getdents(8, {{d_ino=4257114, d_off=1734093409898198492,
[...]
13738 lstat("/gnu/store/.links/1m6g06i01ybbkhjjbirjnj7fckw1b772cwygkvbd6v6zgkln7f7m", {st_dev=makedev(8, 2), st_ino=19, st_mode=S_IFREG|0444, st_nlink=4, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=8, st_size=2553, st_atime=2016/12/08-21:02:54, st_mtime=1970/01/01-01:00:01, st_ctime=2016/12/07-00:05:19}) = 0
13738 lstat("/gnu/store/.links/1ml8n3q55ikn8h60sn67jq1y7z7mvdp5kwr33pqrid2r6kk1d4kb", {st_dev=makedev(8, 2), st_ino=30, st_mode=S_IFREG|0444, st_nlink=2, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=8, st_size=2090, st_atime=2015/03/10-11:29:06, st_mtime=1970/01/01-01:00:01, st_ctime=2015/11/25-11:29:21}) = 0
13738 lstat("/gnu/store/.links/1c8cvwlqyyqby3k13cwm40g26pwca5iiz5dcj43xrgn9y91lfvc2", {st_dev=makedev(8, 2), st_ino=33, st_mode=S_IFREG|0444, st_nlink=2, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=16, st_size=7958, st_atime=2015/11/04-18:55:32, st_mtime=1970/01/01-01:00:01, st_ctime=2016/01/05-11:35:49}) = 0
[...]
13738 getdents(8, {{d_ino=328672,
[...]
13738 lstat("/gnu/store/.links/1l55l59dxb74himmkfzx5v63cv7287i6rjhdns1fdlwajqd73lnz", {st_dev=makedev(8, 2), st_ino=21, st_mode=S_IFREG|0444, st_nlink=31, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=8, st_size=615, st_atime=2016/12/08-21:02:30, st_mtime=1970/01/01-01:00:01, st_ctime=2016/12/11-01:44:47}) = 0
13738 lstat("/gnu/store/.links/1c7mm5amw743mb45f1zg4d4r3g549ch35wks9izkcgkx0jirpxsg", {st_dev=makedev(8, 2), st_ino=48, st_mode=S_IFREG|0444, st_nlink=2, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=360, st_size=176750, st_atime=2015/03/10-11:29:06, st_mtime=1970/01/01-01:00:01, st_ctime=2015/11/25-11:29:20}) = 0
13738 lstat("/gnu/store/.links/0z5s7b0yk8mfn1np6gk3cdbmpnjgxg1g0l8vfq1aa01zwp06d3f0", {st_dev=makedev(8, 2), st_ino=61, st_mode=S_IFREG|0444, st_nlink=2, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=8, st_size=1440, st_atime=2016/11/03-20:37:50, st_mtime=1970/01/01-01:00:01, st_ctime=2016/11/07-00:01:57}) = 0
--8<---------------cut here---------------end--------------->8---

I can’t tell exactly how this affects performance because my machine has
an SSD where this operation takes ~3 seconds on a cold cache.  I suspect
it has performance comparable to that of doing all the ‘readdir’ at once
followed by all the ‘lstat’.

Mark, how does that sound?

I’d like to commit it soon if there are no objections.

Thanks,
Ludo’.

[Message part 2 (text/x-patch, inline)]
diff --git a/nix/libstore/gc.cc b/nix/libstore/gc.cc
index 72eff52..db58603 100644
--- a/nix/libstore/gc.cc
+++ b/nix/libstore/gc.cc
@@ -545,6 +545,9 @@ void LocalStore::tryToDelete(GCState & state, const Path & path)
 }
 
 
+/* Like 'dirent', but with just what we need.  */
+typedef std::pair<Path, ino_t> MiniDirEntry;
+
 /* Unlink all files in /nix/store/.links that have a link count of 1,
    which indicates that there are no other links and so they can be
    safely deleted.  FIXME: race condition with optimisePath(): we
@@ -555,32 +558,57 @@ void LocalStore::removeUnusedLinks(const GCState & state)
     AutoCloseDir dir = opendir(linksDir.c_str());
     if (!dir) throw SysError(format("opening directory `%1%'") % linksDir);
 
+    /* Maximum number of entries stored in memory; each 'MiniDirEntry' takes
+       ~80 bytes.  */
+    const size_t maxEntries = 100000;
+
     long long actualSize = 0, unsharedSize = 0;
 
-    struct dirent * dirent;
-    while (errno = 0, dirent = readdir(dir)) {
-        checkInterrupt();
-        string name = dirent->d_name;
-        if (name == "." || name == "..") continue;
-        Path path = linksDir + "/" + name;
-
-        struct stat st;
-        if (lstat(path.c_str(), &st) == -1)
-            throw SysError(format("statting `%1%'") % path);
-
-        if (st.st_nlink != 1) {
-            unsigned long long size = st.st_blocks * 512ULL;
-            actualSize += size;
-            unsharedSize += (st.st_nlink - 1) * size;
-            continue;
-        }
-
-        printMsg(lvlTalkative, format("deleting unused link `%1%'") % path);
-
-        if (unlink(path.c_str()) == -1)
-            throw SysError(format("deleting `%1%'") % path);
-
-        state.results.bytesFreed += st.st_blocks * 512;
+    bool endOfDir = false;
+
+    while (!endOfDir) {
+	/* Read as many entries as possible at once, up to 'maxEntries'.  */
+	std::list<MiniDirEntry> entries;
+	struct dirent * dirent;
+	while (errno = 0,
+	       (entries.size() < maxEntries) && (dirent = readdir(dir))) {
+	    checkInterrupt();
+	    string name = dirent->d_name;
+	    if (name == "." || name == "..") continue;
+	    entries.emplace_back(MiniDirEntry(name, dirent->d_ino));
+	}
+
+	endOfDir = (dirent == NULL);
+
+	/* Sort entries by inode number to minimize disk seeks induced by the
+	   following 'lstat' calls.  */
+	entries.sort([] (const MiniDirEntry& e1, const MiniDirEntry& e2) {
+		return e1.second < e2.second;
+	    });
+
+	for (auto && item: entries) {
+	    checkInterrupt();
+
+	    Path path = linksDir + "/" + item.first;
+
+	    struct stat st;
+	    if (lstat(path.c_str(), &st) == -1)
+		throw SysError(format("statting `%1%'") % path);
+
+	    if (st.st_nlink != 1) {
+		unsigned long long size = st.st_blocks * 512ULL;
+		actualSize += size;
+		unsharedSize += (st.st_nlink - 1) * size;
+		continue;
+	    }
+
x+	    printMsg(lvlTalkative, format("deleting unused link `%1%'") % path);
+
+	    if (unlink(path.c_str()) == -1)
+		throw SysError(format("deleting `%1%'") % path);
+
+	    state.results.bytesFreed += st.st_blocks * 512;
+	}
     }
 
     struct stat st;

This bug report was last modified 3 years and 203 days ago.

Previous Next


GNU bug tracking system
Copyright (C) 1999 Darren O. Benham, 1997,2003 nCipher Corporation Ltd, 1994-97 Ian Jackson.