Package: guix;
Reported by: ludo <at> gnu.org (Ludovic Courtès)
Date: Sun, 13 Nov 2016 17:42:02 UTC
Severity: important
View this message in rfc822 format
From: ludo <at> gnu.org (Ludovic Courtès) To: 24937 <at> debbugs.gnu.org Cc: Mark H Weaver <mhw <at> netris.org> Subject: bug#24937: "deleting unused links" GC phase is too slow Date: Sun, 11 Dec 2016 14:46:13 +0100
[Message part 1 (text/plain, inline)]
Hello! Here’s a proposed patch that follows your suggestion, Mark, but places an upper bound on the number of directory entries loaded in memory. On my laptop, which has ~500k entries in /gnu/store/.links, the result is something like this (notice the inode numbers in ‘lstat’ calls): --8<---------------cut here---------------start------------->8--- 13738 write(4, "gmlo\0\0\0\0\31\0\0\0\0\0\0\0deleting unused "..., 48) = 48 13738 open("/gnu/store/.links", O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC) = 8 13738 fstat(8, {st_dev=makedev(8, 2), st_ino=4014083, st_mode=S_IFDIR|0755, st_nlink=2, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=119224, st_size=60977152, st_atime=2016/12/11-12:01:59, st_mtime=2016/12/11-09:39:45, st_ctime=2016/12/11-09:39:45}) = 0 13738 getdents(8, {{d_ino=4014083, d_off=4294967296, d_reclen=24, d_name="."} [...] 13738 lstat("/gnu/store/.links/1f2f3g8waxwymp9sl2slcfyara164i8w1y2sz3h9js2fcviv2rnc", {st_dev=makedev(8, 2), st_ino=47, st_mode=S_IFREG|0444, st_nlink=2, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=88, st_size=41482, st_atime=2015/03/10-11:29:06, st_mtime=1970/01/01-01:00:01, st_ctime=2015/11/25-11:29:20}) = 0 13738 lstat("/gnu/store/.links/1p25kpyfw354in4kykmgh5sy9h925hnil1jdzgxhz7n6abbws8px", {st_dev=makedev(8, 2), st_ino=65, st_mode=S_IFREG|0444, st_nlink=9, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=8, st_size=2313, st_atime=2016/12/08-21:02:26, st_mtime=1970/01/01-01:00:01, st_ctime=2016/12/11-01:44:49}) = 0 13738 lstat("/gnu/store/.links/163187g637br9ys5pmshb01wjav53bs1g1a83m7c2alpdyx3yqz2", {st_dev=makedev(8, 2), st_ino=68, st_mode=S_IFREG|0444, st_nlink=2, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=32, st_size=13561, st_atime=2015/03/10-11:29:06, st_mtime=1970/01/01-01:00:01, st_ctime=2015/11/25-11:29:20}) = 0 [...] 13738 getdents(8, {{d_ino=4257114, d_off=1734093409898198492, [...] 13738 lstat("/gnu/store/.links/1m6g06i01ybbkhjjbirjnj7fckw1b772cwygkvbd6v6zgkln7f7m", {st_dev=makedev(8, 2), st_ino=19, st_mode=S_IFREG|0444, st_nlink=4, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=8, st_size=2553, st_atime=2016/12/08-21:02:54, st_mtime=1970/01/01-01:00:01, st_ctime=2016/12/07-00:05:19}) = 0 13738 lstat("/gnu/store/.links/1ml8n3q55ikn8h60sn67jq1y7z7mvdp5kwr33pqrid2r6kk1d4kb", {st_dev=makedev(8, 2), st_ino=30, st_mode=S_IFREG|0444, st_nlink=2, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=8, st_size=2090, st_atime=2015/03/10-11:29:06, st_mtime=1970/01/01-01:00:01, st_ctime=2015/11/25-11:29:21}) = 0 13738 lstat("/gnu/store/.links/1c8cvwlqyyqby3k13cwm40g26pwca5iiz5dcj43xrgn9y91lfvc2", {st_dev=makedev(8, 2), st_ino=33, st_mode=S_IFREG|0444, st_nlink=2, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=16, st_size=7958, st_atime=2015/11/04-18:55:32, st_mtime=1970/01/01-01:00:01, st_ctime=2016/01/05-11:35:49}) = 0 [...] 13738 getdents(8, {{d_ino=328672, [...] 13738 lstat("/gnu/store/.links/1l55l59dxb74himmkfzx5v63cv7287i6rjhdns1fdlwajqd73lnz", {st_dev=makedev(8, 2), st_ino=21, st_mode=S_IFREG|0444, st_nlink=31, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=8, st_size=615, st_atime=2016/12/08-21:02:30, st_mtime=1970/01/01-01:00:01, st_ctime=2016/12/11-01:44:47}) = 0 13738 lstat("/gnu/store/.links/1c7mm5amw743mb45f1zg4d4r3g549ch35wks9izkcgkx0jirpxsg", {st_dev=makedev(8, 2), st_ino=48, st_mode=S_IFREG|0444, st_nlink=2, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=360, st_size=176750, st_atime=2015/03/10-11:29:06, st_mtime=1970/01/01-01:00:01, st_ctime=2015/11/25-11:29:20}) = 0 13738 lstat("/gnu/store/.links/0z5s7b0yk8mfn1np6gk3cdbmpnjgxg1g0l8vfq1aa01zwp06d3f0", {st_dev=makedev(8, 2), st_ino=61, st_mode=S_IFREG|0444, st_nlink=2, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=8, st_size=1440, st_atime=2016/11/03-20:37:50, st_mtime=1970/01/01-01:00:01, st_ctime=2016/11/07-00:01:57}) = 0 --8<---------------cut here---------------end--------------->8--- I can’t tell exactly how this affects performance because my machine has an SSD where this operation takes ~3 seconds on a cold cache. I suspect it has performance comparable to that of doing all the ‘readdir’ at once followed by all the ‘lstat’. Mark, how does that sound? I’d like to commit it soon if there are no objections. Thanks, Ludo’.
[Message part 2 (text/x-patch, inline)]
diff --git a/nix/libstore/gc.cc b/nix/libstore/gc.cc index 72eff52..db58603 100644 --- a/nix/libstore/gc.cc +++ b/nix/libstore/gc.cc @@ -545,6 +545,9 @@ void LocalStore::tryToDelete(GCState & state, const Path & path) } +/* Like 'dirent', but with just what we need. */ +typedef std::pair<Path, ino_t> MiniDirEntry; + /* Unlink all files in /nix/store/.links that have a link count of 1, which indicates that there are no other links and so they can be safely deleted. FIXME: race condition with optimisePath(): we @@ -555,32 +558,57 @@ void LocalStore::removeUnusedLinks(const GCState & state) AutoCloseDir dir = opendir(linksDir.c_str()); if (!dir) throw SysError(format("opening directory `%1%'") % linksDir); + /* Maximum number of entries stored in memory; each 'MiniDirEntry' takes + ~80 bytes. */ + const size_t maxEntries = 100000; + long long actualSize = 0, unsharedSize = 0; - struct dirent * dirent; - while (errno = 0, dirent = readdir(dir)) { - checkInterrupt(); - string name = dirent->d_name; - if (name == "." || name == "..") continue; - Path path = linksDir + "/" + name; - - struct stat st; - if (lstat(path.c_str(), &st) == -1) - throw SysError(format("statting `%1%'") % path); - - if (st.st_nlink != 1) { - unsigned long long size = st.st_blocks * 512ULL; - actualSize += size; - unsharedSize += (st.st_nlink - 1) * size; - continue; - } - - printMsg(lvlTalkative, format("deleting unused link `%1%'") % path); - - if (unlink(path.c_str()) == -1) - throw SysError(format("deleting `%1%'") % path); - - state.results.bytesFreed += st.st_blocks * 512; + bool endOfDir = false; + + while (!endOfDir) { + /* Read as many entries as possible at once, up to 'maxEntries'. */ + std::list<MiniDirEntry> entries; + struct dirent * dirent; + while (errno = 0, + (entries.size() < maxEntries) && (dirent = readdir(dir))) { + checkInterrupt(); + string name = dirent->d_name; + if (name == "." || name == "..") continue; + entries.emplace_back(MiniDirEntry(name, dirent->d_ino)); + } + + endOfDir = (dirent == NULL); + + /* Sort entries by inode number to minimize disk seeks induced by the + following 'lstat' calls. */ + entries.sort([] (const MiniDirEntry& e1, const MiniDirEntry& e2) { + return e1.second < e2.second; + }); + + for (auto && item: entries) { + checkInterrupt(); + + Path path = linksDir + "/" + item.first; + + struct stat st; + if (lstat(path.c_str(), &st) == -1) + throw SysError(format("statting `%1%'") % path); + + if (st.st_nlink != 1) { + unsigned long long size = st.st_blocks * 512ULL; + actualSize += size; + unsharedSize += (st.st_nlink - 1) * size; + continue; + } + x+ printMsg(lvlTalkative, format("deleting unused link `%1%'") % path); + + if (unlink(path.c_str()) == -1) + throw SysError(format("deleting `%1%'") % path); + + state.results.bytesFreed += st.st_blocks * 512; + } } struct stat st;
GNU bug tracking system
Copyright (C) 1999 Darren O. Benham,
1997,2003 nCipher Corporation Ltd,
1994-97 Ian Jackson.