GNU bug report logs - #78051
[WIP] services: root-file-system: In 'stop' method, find and kill processes that are writing to our filesystems, and then umount the filesystems.

Previous Next

Package: guix-patches;

Reported by: Danny Milosavljevic <dannym <at> friendly-machines.com>

Date: Thu, 24 Apr 2025 23:04:02 UTC

Severity: normal

Full log


View this message in rfc822 format

From: Danny Milosavljevic <dannym <at> friendly-machines.com>
To: 78051 <at> debbugs.gnu.org
Cc: Danny Milosavljevic <dannym <at> friendly-machines.com>
Subject: [bug#78051] [WIP v6] services: root-file-system: In 'stop' method, find and kill processes that are writing to our filesystems, and then umount the filesystems.
Date: Sat, 26 Apr 2025 12:05:24 +0200
* gnu/services/base.scm (%root-file-system-shepherd-service): In 'stop'
method, find and kill processes that are writing to our filesystems, and then
umount the filesystems.

Change-Id: Ib0ffff2257dca5fff3df99fea2d5de81a9612336
---
 gnu/services/base.scm | 2860 +++++++++++++++++++++++------------------
 1 file changed, 1627 insertions(+), 1233 deletions(-)

diff --git a/gnu/services/base.scm b/gnu/services/base.scm
index 8c6563c..22168a3 100644
--- a/gnu/services/base.scm
+++ b/gnu/services/base.scm
@@ -61,15 +61,15 @@ (define-module (gnu services base)
   #:use-module (gnu packages admin)
   #:use-module ((gnu packages linux)
                 #:select (alsa-utils btrfs-progs crda eudev
-                          e2fsprogs f2fs-tools fuse gpm kbd lvm2 rng-tools
-                          util-linux xfsprogs))
+                                     e2fsprogs f2fs-tools fuse gpm kbd lvm2 rng-tools
+                                     util-linux xfsprogs))
   #:use-module (gnu packages bash)
   #:use-module ((gnu packages base)
                 #:select (coreutils glibc glibc/hurd
-                          glibc-utf8-locales
-                          libc-utf8-locales-for-target
-                          make-glibc-utf8-locales
-                          tar canonical-package))
+                                    glibc-utf8-locales
+                                    libc-utf8-locales-for-target
+                                    make-glibc-utf8-locales
+                                    tar canonical-package))
   #:use-module ((gnu packages cross-base)
                 #:select (cross-libc))
   #:use-module ((gnu packages compression) #:select (gzip))
@@ -346,12 +346,373 @@ (define %root-file-system-shepherd-service
   (shepherd-service
    (documentation "Take care of the root file system.")
    (provision '(root-file-system))
+   ;; Is it possible to have (gnu build linux-boot) loaded already?
+   ;; In that case, I'd like to move a lot of stuff there.
+   (modules '((ice-9 textual-ports)
+              (ice-9 control)
+              (ice-9 string-fun)
+              (ice-9 match)
+              (ice-9 ftw) ; scandir
+              (ice-9 rdelim)
+              (srfi srfi-1)        ; filter, for-each, find.
+              (srfi srfi-26)       ; cut
+              (ice-9 exceptions))) ; guard
+              ; TODO (guix build syscalls)
    (start #~(const #t))
    (stop #~(lambda _
-             ;; Return #f if successfully stopped.
+             ;;; Return #f if successfully stopped.
+
+             ;;; Beginning of inlined module (fuser)
+
+             (define log (make-parameter (lambda args
+                                           (apply format (current-error-port) args))))
+             (define *proc-dir-name* "/proc")
+             (define *default-silent-errors*
+               (list ENOENT ESRCH))
+
+             (define* (call-with-safe-syscall thunk
+                                              #:key
+                                              (on-error #f)
+                                              (silent-errors *default-silent-errors*)
+                                              (error-message-format #f)
+                                              (error-context '()))
+               "Call THUNK, handling system errors:
+- If ERROR-MESSAGE-FORMAT and the error is not in SILENT-ERRORS, calls format
+with ERROR-MESSAGE-FORMAT and ERROR-CONTEXT and (strerror errno) as arguments.
+- Return ON-ERROR on error."
+               (catch 'system-error
+                      thunk
+                      (lambda args
+                        (let ((errno (system-error-errno args)))
+                          (unless (member errno silent-errors)
+                            (when error-message-format
+                              (apply (log)
+                                     error-message-format
+                                     (append
+                                      error-context
+                                      (list (strerror errno))))))
+                          on-error))))
+
+             (define (safe-stat path)
+               "Get stat info for PATH--or #f if not possible."
+               (call-with-safe-syscall (lambda () (stat path))
+                                       #:error-message-format "Error: Cannot stat ~s: ~a~%"
+                                       #:error-context (list path)
+                                       #:silent-errors '()
+                                       #:on-error #f))
+
+             (define (safe-umount path) ; TODO: UMOUNT_NOFOLLOW ?
+               "Umount PATH--if possible.."
+               (call-with-safe-syscall (lambda () (umount path))
+                                       #:error-message-format "Error: Cannot umount ~s: ~a~%"
+                                       #:error-context (list path)
+                                       #:silent-errors '()
+                                       #:on-error 'error))
+
+             (define (safe-lstat path)
+               "Get lstat info for PATH--or #f if not possible."
+               (call-with-safe-syscall (lambda () (lstat path))
+                                       #:error-message-format "Error: Cannot lstat ~s: ~a~%"
+                                       #:error-context (list path)
+                                       #:on-error #f))
+
+             (define (safe-scandir path)
+               "scandir PATH--or #f if not possible."
+               (let ((result (scandir path)))
+                 (if result
+                     result
+                     (begin
+                       ((log) "Error: Cannot scandir ~s: ?~%" path)
+                       '()))))
+
+;;; Processes
+
+             (define (safe-get-fd-flags pid fd)
+               "Get flags for FD in PID--or #f if not possible."
+               (let ((fdinfo-path (format #f "~a/~a/fdinfo/~a" *proc-dir-name* pid fd)))
+                 (call-with-safe-syscall (lambda ()
+                                           (call-with-input-file fdinfo-path
+                                             (lambda (port)
+                                               ;; Find 'flags:' line and parse octal value
+                                               (let loop ()
+                                                 (let ((line (get-line port)))
+                                                   (cond ((eof-object? line) #f)
+                                                         ((string-prefix? "flags:\t" line)
+                                                          (match (string-split line #\tab)
+                                                            ((_ flags-str)
+                                                             (catch 'invalid-argument
+                                                                    (lambda ()
+                                                                      (string->number flags-str 8))
+                                                                    (lambda args
+                                                                      #f)))
+                                                            (_ #f)))
+                                                         (else (loop))))))))
+                                         #:error-message-format "Error: Cannot read ~s: ~a~%"
+                                         #:error-context (list fdinfo-path)
+                                         #:on-error #f)))
+
+             (define (safe-get-processes)
+               "Get a list of all PIDs from proc--or #f if not possible."
+               (let ((proc-dir *proc-dir-name*))
+                 (catch 'system-error
+                        (lambda ()
+                          ;; Keep only numbers.
+                          (filter-map string->number (safe-scandir proc-dir)))
+                        ;; FIXME is errno even useful?
+                        (lambda scan-err
+                          ((log) "Error scanning ~s: ~a~%"
+                           proc-dir (strerror (system-error-errno scan-err)))
+                          '()))))
+
+             (define (safe-fd-on-device? pid fd target-device)
+               "Return whether fd FD on pid PID is on device TARGET-DEVICE."
+               (let* ((fd-path (readlink (format #f "~a/~a/fd/~a" *proc-dir-name* pid fd)))
+                      (stat (safe-lstat fd-path)))
+                 (and stat (eqv? (stat:dev stat)
+                                 target-device))))
+
+             (define (safe-get-process-fds pid)
+               "Get a list of all FDs of PID from proc--or #f if not possible."
+               (let ((fd-dir (format #f "~a/~a/fd" *proc-dir-name* pid)))
+                 ;; Keep only numbers.
+                 (filter-map string->number (safe-scandir fd-dir))))
+
+             (define (filter-process-fd-flags pid fds predicate)
+               "Get FLAGS from proc for PID and call PREDICATE with (FD FLAGS) each."
+               (filter (lambda (fd)
+                         (predicate fd (safe-get-fd-flags pid fd)))
+                       fds))
+
+             (define (safe-get-process-command pid)
+               "Return command of process PID--or #f if not possible."
+               (let ((cmdline-path (format #f "~a/~a/cmdline" *proc-dir-name* pid)))
+                 (call-with-safe-syscall (lambda ()
+                                           (call-with-input-file cmdline-path
+                                             (lambda (port)
+                                               (let ((full-cmdline (get-string-all port)))
+                                                 (match (string-split full-cmdline #\nul)
+                                                   ((command-name . _) command-name))))))
+                                         #:error-message-format "Error: Cannot read ~s: ~a~%"
+                                         #:error-context (list cmdline-path)
+                                         #:on-error #f)))
+
+             (define (safe-kill-process pid kill-signal)
+               "Kill process PID with KILL-SIGNAL if possible."
+               (call-with-safe-syscall (lambda ()
+                                         (kill pid kill-signal)
+                                         #t)
+                                       #:on-error 'error
+                                       #:silent-errors '()
+                                       #:error-message-format
+                                       "Error: Failed to kill process ~a: ~a~%"
+                                       #:error-context '()))
+
+;;; Mounts
+
+             (define (safe-get-device mount-point)
+               "Get the device ID (st_dev) of MOUNT-POINT--or #f if not possible."
+               (and=>
+                (safe-stat mount-point) ; TODO: lstat? Is that safe?
+                stat:dev))
+
+             (define (safe-parse-mountinfo path)
+               "Read and parse /proc/self/mountinfo (or specified path).
+Return a list of parsed entries, where each entry is:
+(list mount-id parent-id mount-point-string)
+Return '() on file read error or if file is unparseable."
+               (call-with-safe-syscall ; TODO: call-with-input-file is not actually a syscall.
+                (lambda ()
+                  (let ((entries '()))
+                    (call-with-input-file path
+                      (lambda (port)
+                        (let loop ()
+                          (let ((line (get-line port)))
+                            (unless (eof-object? line)
+                              (match (string-split line #\space)
+                                ((mount-id-str parent-id-str major-minor root mount-point rest ...)
+                                 ;; Attempt to parse IDs, skip line on error
+                                 (catch 'invalid-argument
+                                        (lambda ()
+                                          (let ((mount-id (string->number mount-id-str))
+                                                (parent-id (string->number parent-id-str)))
+                                            ;; Add successfully parsed entry to list
+                                            (set! entries (cons (list mount-id parent-id mount-point)
+                                                                entries))
+                                            (loop)))
+                                        (lambda args
+                                          ((log)
+                                           "Warning: Skipping mountinfo line due to parse error: ~s (~a)~%"
+                                           line args)
+                                          (loop))))
+                                (x (begin
+                                     ((log) "Warning: Skipping mountinfo line: %s" x)
+                                     (loop)))))))))
+                    ;; Return parsed entries in file order
+                    (reverse entries)))
+                #:error-message-format "Error: Cannot read or parse mountinfo file ~s: ~a"
+                #:error-context (list path)
+                #:on-error '(error)))
+
+             (define (safe-find-nested-mounts root-mount-point target-device)
+               "Find mount points that block the unmounting of ROOT-MOUNT-POINT.
+TARGET-DEVICE argument is ignored.
+Mountpoints are returned depth-first (in the order they can be unmounted).
+ROOT-MOUNT-POINT is included."
+               (let* ((mountinfo (safe-parse-mountinfo (format #f "~a/self/mountinfo" *proc-dir-name*))))
+                 (define (safe-find-mounts-via-mountinfo accumulator lives root-mount-point)
+                   (if (member root-mount-point accumulator)
+                       ((log) "Cycle detected~%"))
+                   (let ((accumulator (cons root-mount-point accumulator)))
+                     (if (= lives 0)
+                         (begin
+                           ((log) "Error: Recursive mountpoints too deep.~%")
+                           accumulator)
+                         (let ((root-entry (find (lambda (entry)
+                                                   (match entry
+                                                     ((_ _ mp) (string=? mp root-mount-point))
+                                                     (_ #f))) ; Should not happen
+                                                 mountinfo)))
+                           (if root-entry
+                               (let ((root-mount-id (car root-entry)))
+                                 (fold (lambda (entry accumulator)
+                                         (match entry
+                                           ((_ parent-id mp)
+                                            (if (= parent-id root-mount-id)
+                                                (safe-find-mounts-via-mountinfo accumulator
+                                                                                (- lives 1)
+                                                                                mp)
+                                                accumulator))
+                                           (_ accumulator)))
+                                       accumulator
+                                       mountinfo))
+                               (begin
+                                 ((log) "Error: Could not find mount ID for ~s in parsed mountinfo~%"
+                                  root-mount-point)
+                                 accumulator))))))
+                 (safe-find-mounts-via-mountinfo '() 100 root-mount-point)))
+
+             ;;; End of inlined module (fuser)
+
+             (define *root-mount-point* "/")
+
+             (define O_ACCMODE #o0003)
+
+             (define (flags-has-write-access? flags)
+               "Given open FLAGS, return whether it (probably) signifies write access."
+               (and flags (not (= (logand flags O_ACCMODE)
+                                  O_RDONLY))))
+
+             (define (kill-process? pid command)
+               "Return whether to kill process with id PID (and command COMMAND)"
+               ((log) "~%Process Found: PID ~a  Command: ~s~%" pid command)
+               #t
+               ;((log) "Kill process ~a? [y/N] " pid)
+               ;(force-output (current-error-port))
+               ;(let ((response (read-char (current-input-port))))
+               ;  (if (not (eof-object? response))
+               ;      ;; Consume rest of line.
+               ;      (read-line (current-input-port)))
+               ;  (or (eqv? response #\y)
+               ;      (eqv? response #\Y)))
+                     )
+
              (sync)
 
-             (let ((null (%make-void-port "w")))
+             (let* ((null (%make-void-port "w"))
+                    (call-with-io-file (lambda (file-name proc)
+                                         (let ((port (open file-name O_RDWR)))
+                                           (set-current-input-port port)
+                                           (set-current-output-port port)
+                                           (set-current-error-port port)
+                                           (catch #t (lambda ()
+                                                       (proc)
+                                                       (set-current-input-port null)
+                                                       (set-current-output-port null)
+                                                       (set-current-error-port null)
+                                                       (close port))
+                                                  (lambda args
+                                                    (set-current-input-port null)
+                                                    (set-current-output-port null)
+                                                    (set-current-error-port null)
+                                                    (close port)))))))
+               (let-syntax ((with-mounted-filesystem (syntax-rules ()
+                                               ((_ source mountpoint file-system-type flags options exp ...)
+                                                (call-with-mounted-filesystem source mountpoint file-system-type flags options
+                                                                              (lambda () (begin exp ...)))))))
+
+               (define (call-with-logging thunk)
+                 (mkdir-p "/proc")
+                 (mkdir-p "/dev")
+                 (with-mounted-filesystem "none" "/proc" "proc" 0 #f ; TODO: MS_NODEV, MS_NOEXEC, MS_NOSUID
+                   (with-mounted-filesystem "none" "/dev" "devtmpfs" 0 #f ; TODO: MS_NOEXEC, MS_NOSUID
+                     (catch 'system-error
+                       (lambda ()
+                         (mknod "/dev/console" 'char-special #o600 (+ (* 5 256) 1)))
+                         (const #f))
+                     (catch 'system-error
+                       (lambda ()
+                         (mknod "/dev/tty0" 'char-special #o600 (+ (* 4 256) 0)))
+                         (const #f))
+                     (call-with-io-file "/dev/console" ; TODO: /dev/console after we set it up using vt-set-as-console at boot or something (see plymouth).
+                      (lambda ()
+                        ;(vt-activate (current-input-port) 12)
+                        (thunk))))))
+
+               (define (get-clean-ups)
+                 ;; We rarely (or ever) log--and if we did have a logger
+                 ;; at all times, we'd show up on our own shitlist.
+                 ;; So: open logger, log, close logger--on every message.
+                 (parameterize ((log (lambda args
+                                       (call-with-logging
+                                        (lambda ()
+                                          (format (current-error-port) args))))))
+                   (let* ((root-device (safe-get-device *root-mount-point*))
+                          (mounts (safe-find-nested-mounts *root-mount-point* root-device))
+                          (mount-devices (map safe-get-device mounts)))
+                     (let* ((our-pid (getpid))
+                            (pids (filter (lambda (pid)
+                                            (not (= pid our-pid)))
+                                          (safe-get-processes)))
+                            (pids (filter (lambda (pid)
+                                            (match (filter-process-fd-flags pid
+                                                    (safe-get-process-fds pid)
+                                                    (lambda (fd flags)
+                                                      (and (flags-has-write-access? flags)
+                                                           (find (lambda (target-device)
+                                                                   (safe-fd-on-device? pid fd target-device))
+                                                                 mount-devices))))
+                                              ((x . _) #t)
+                                              (_ #f)))
+                                          pids)))
+                       (list pids mounts mount-devices)))))
+
+               (define (call-with-mounted-filesystem source mountpoint file-system-type flags options proc)
+                 (mount source mountpoint file-system-type flags options #:update-mtab? #f)
+                 (catch #t
+                        (lambda ()
+                          (proc)
+                          (umount mountpoint))
+                        (lambda args
+                          (umount mountpoint))))
+
+               ;; This will also take care of setting up a logger for the
+               ;; entire runtime of the function.
+               (define (kill-processes pids mounts mount-devices signal)
+                 (call-with-logging
+                  (lambda ()
+                    (parameterize ((log (lambda args
+                                           (apply format (current-error-port) args))))
+                      (let ((error-port (current-error-port)))
+                        (format error-port "Searched for processes writing on devices ~s (mount points ~s)...~%" mount-devices mounts)
+                        (format error-port "Found ~a process(es) matching the criteria.~%" (length pids))
+                        (for-each (lambda (pid)
+                                    (let ((command (safe-get-process-command pid)))
+                                      (if (kill-process? pid command)
+                                          (safe-kill-process pid signal)
+                                          (format error-port "Skipping PID ~a (~s).~%" pid command))))
+                                  pids)
+                        (format error-port "~%Process scan complete.~%"))))))
+
                ;; Redirect the default output ports.
                (set-current-output-port null)
                (set-current-error-port null)
@@ -363,12 +724,45 @@ (define %root-file-system-shepherd-service
                ;; root file system can be re-mounted read-only.
                (let loop ((n 10))
                  (unless (catch 'system-error
-                           (lambda ()
-                             (mount #f "/" #f
-                                    (logior MS_REMOUNT MS_RDONLY)
-                                    #:update-mtab? #f)
-                             #t)
-                           (const #f))
+                                (lambda ()
+                                  (mount #f "/" #f
+                                         (logior MS_REMOUNT MS_RDONLY)
+                                         #:update-mtab? #f)
+                                  #t)
+                                (const #f))
+                   (when (zero? n)
+                     ;; 1. Send SIGTERM to all writing processes (if any)
+                     (match (get-clean-ups)
+                       ((pids mounts mount-devices)
+                        (when (> (length pids) 0)
+                          (kill-processes pids mounts mount-devices SIGTERM)
+                          ((@ (fibers) sleep) 5))))
+
+                     ;; 2. Send SIGKILL to all writing processes
+                     (match (get-clean-ups)
+                       ((pids mounts mount-devices)
+                        (when (> (length pids) 0)
+                          (kill-processes pids mounts mount-devices SIGKILL)
+                          ((@ (fibers) sleep) 5))
+
+                        ;; 3. Unmount filesystems
+                        (for-each safe-umount mounts)))
+
+                     ;; Should have been unmounted already--but we are paranoid
+                     ;; (and possibly were blocking ourselves anyway).
+                     (catch 'system-error
+                            (lambda ()
+                              (mount #f "/" #f
+                                     (logior MS_REMOUNT MS_RDONLY)
+                                     #:update-mtab? #f)
+                              ((@ (fibers) sleep) 5) ; just in case
+                              #t)
+                            (lambda args
+                              ((log) "failed to remount / ro %s %~" args)
+                              (let loopity ((q 0))
+                                ((log) "user, do something!~%")
+                                ((@ (fibers) sleep) 1)
+                                (loopity (+ q 1)))))))
                    (unless (zero? n)
                      ;; Yield to the other fibers.  That gives logging fibers
                      ;; an opportunity to close log files so the 'mount' call





This bug report was last modified 54 days ago.

Previous Next


GNU bug tracking system
Copyright (C) 1999 Darren O. Benham, 1997,2003 nCipher Corporation Ltd, 1994-97 Ian Jackson.