Package: guix-patches;
Reported by: Danny Milosavljevic <dannym <at> friendly-machines.com>
Date: Thu, 24 Apr 2025 23:04:02 UTC
Severity: normal
View this message in rfc822 format
From: Danny Milosavljevic <dannym <at> friendly-machines.com> To: 78051 <at> debbugs.gnu.org Cc: Danny Milosavljevic <dannym <at> friendly-machines.com> Subject: [bug#78051] [WIP] services: root-file-system: In 'stop' method, find and kill processes that are writing to our filesystems, and then umount the filesystems. Date: Fri, 25 Apr 2025 01:03:17 +0200
* gnu/services/base.scm (%root-file-system-shepherd-service): In 'stop' method, find and kill processes that are writing to our filesystems, and then umount the filesystems. Change-Id: If244a1594281057ee5b6163e23bcf11fab3968ff --- gnu/services/base.scm | 381 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 367 insertions(+), 14 deletions(-) diff --git a/gnu/services/base.scm b/gnu/services/base.scm index 8c6563c99d..de24d07b4e 100644 --- a/gnu/services/base.scm +++ b/gnu/services/base.scm @@ -348,10 +348,337 @@ (define %root-file-system-shepherd-service (provision '(root-file-system)) (start #~(const #t)) (stop #~(lambda _ - ;; Return #f if successfully stopped. + ;;; Return #f if successfully stopped. + + ;;; Beginning of inlined module (fuser) + + (use-modules (ice-9 textual-ports) + (ice-9 control) + (ice-9 string-fun) + (ice-9 match) + (ice-9 ftw) ; scandir + (srfi srfi-1) ; filter, for-each, find. + (srfi srfi-26) ; cut + (ice-9 exceptions)) ; guard + + (define PROC-DIR-NAME "/proc") + (define DEFAULT-SILENT-ERRORS + (list ENOENT ESRCH)) + + (define* (call-with-safe-syscall thunk + #:key + (on-error #f) + (silent-errors DEFAULT-SILENT-ERRORS) + (error-message-format #f) + (error-context '())) + "Call THUNK, handling system errors: +- If ERROR-MESSAGE-FORMAT and the error is not in SILENT-ERRORS, calls format +with ERROR-MESSAGE-FORMAT and ERROR-CONTEXT and (strerror errno) as arguments. +- Return ON-ERROR on error." + (catch 'system-error + thunk + (lambda args + (let ((errno (system-error-errno args))) + (unless (member errno silent-errors) + (when error-message-format + (apply format + (current-error-port) + error-message-format + (append + error-context + (list (strerror errno)))))) + on-error)))) + + (define (safe-stat path) + "Get stat info for PATH--or #f if not possible." + (call-with-safe-syscall (lambda () (stat path)) + #:error-message-format "Error: Cannot stat ~s: ~a~%" + #:error-context (list path) + #:silent-errors '() + #:on-error #f)) + + (define (safe-umount path) + "Umount PATH--if possible.." + (call-with-safe-syscall (lambda () (umount path)) + #:error-message-format "Error: Cannot umount ~s: ~a~%" + #:error-context (list path) + #:silent-errors '() + #:on-error 'error)) + + (define (safe-lstat path) + "Get lstat info for PATH--or #f if not possible." + (call-with-safe-syscall (lambda () (lstat path)) + #:error-message-format "Error: Cannot lstat ~s: ~a~%" + #:error-context (list path) + #:on-error #f)) + + (define (safe-scandir path) + "scandir PATH--or #f if not possible." + (let ((result (scandir path))) + (if result + result + (begin + (format (current-error-port) "Error: Cannot scandir ~s: ?~%" path) + '())))) + +;;; Processes + + (define (safe-get-fd-flags pid fd) + "Get flags for FD in PID--or #f if not possible." + (let ((fdinfo-path (format #f "~a/~a/fdinfo/~a" PROC-DIR-NAME pid fd))) + (call-with-safe-syscall (lambda () + (call-with-input-file fdinfo-path + (lambda (port) + ;; Find 'flags:' line and parse octal value + (let loop () + (let ((line (get-line port))) + (cond ((eof-object? line) #f) + ((string-prefix? "flags:\t" line) + (match (string-split line #\tab) + ((_ flags-str . _) + (catch 'invalid-argument + (lambda () + (string->number flags-str 8)) + (lambda args + #f))) + (_ #f))) + (else (loop)))))))) + #:error-message-format "Error: Cannot read ~s: ~a~%" + #:error-context (list fdinfo-path) + #:on-error #f))) + + (define (safe-get-processes) + "Get a list of all PIDs from proc--or #f if not possible." + (let ((proc-dir PROC-DIR-NAME)) + (catch 'system-error + (lambda () + ;; Keep only numbers. + (filter-map string->number (safe-scandir proc-dir))) + ;; FIXME is errno even useful? + (lambda scan-err + (format (current-error-port) "Error scanning ~s: ~a~%" + proc-dir (strerror (system-error-errno scan-err))) + '())))) + + (define (safe-fd-on-device? pid fd target-device) + "Return whether fd FD on pid PID is on device TARGET-DEVICE." + (let* ((fd-path (format #f "~a/~a/fd/~a" PROC-DIR-NAME pid fd)) + (link-stat (safe-lstat fd-path))) + (and link-stat (eqv? (stat:dev link-stat) + target-device)))) + + (define (safe-get-process-fds pid) + "Get a list of all FDs of PID from proc--or #f if not possible." + (let ((fd-dir (format #f "~a/~a/fd" PROC-DIR-NAME pid))) + ;; Keep only numbers. + + + (filter-map string->number (safe-scandir fd-dir)))) + + (define (filter-process-fd-flags pid fds predicate) + "Get FLAGS from proc for PID and call PREDICATE with (FD FLAGS) each." + (filter (lambda (fd) + (predicate fd (safe-get-fd-flags pid fd))) + fds)) + + (define (safe-get-process-command pid) + "Return command of process PID--or #f if not possible." + (let ((cmdline-path (format #f "~a/~a/cmdline" PROC-DIR-NAME pid))) + (call-with-safe-syscall (lambda () + (call-with-input-file cmdline-path + (lambda (port) + (let ((full-cmdline (get-string-all port))) + (match (string-split full-cmdline #\nul) + ((command-name . _) command-name)))))) + #:error-message-format "Error: Cannot read ~s: ~a~%" + #:error-context (list cmdline-path) + #:on-error #f))) + + (define (safe-kill-process pid kill-signal) + "Kill process PID with KILL-SIGNAL if possible." + (call-with-safe-syscall (lambda () + (kill pid kill-signal) + #t) + #:on-error 'error + #:silent-errors '() + #:error-message-format + "Error: Failed to kill process ~a: ~a~%" + #:error-context '())) + +;;; Mounts + + (define (safe-get-device mount-point) + "Get the device ID (st_dev) of MOUNT-POINT--or #f if not possible." + (and=> + (safe-stat mount-point) + stat:dev)) + + (define (safe-parse-mountinfo path) + "Read and parse /proc/self/mountinfo (or specified path). +Return a list of parsed entries, where each entry is: +(list mount-id parent-id mount-point-string) +Return '() on file read error or if file is unparseable." + (call-with-safe-syscall ; TODO: call-with-input-file is not actually a syscall. + (lambda () + (let ((entries '())) + (call-with-input-file path + (lambda (port) + (let loop () + (let ((line (get-line port))) + (unless (eof-object? line) + (match (string-split line #\space) + ;; mnt_id par_id major:minor root mount_point ... + ((m-id-str p-id-str _ _ mp . _) + ;; Attempt to parse IDs, skip line on error + (catch 'invalid-argument + (lambda () + (let ((mount-id (string->number m-id-str)) + (parent-id (string->number p-id-str))) + ;; Add successfully parsed entry to list + (set! entries (cons (list mount-id parent-id mp) + entries)) + (loop))) ; Continue to next line + (lambda args + (format (current-error-port) + "Warning: Skipping mountinfo line due to parse error: ~s (~a)~%" + line args) + (loop)))) + (_ (loop)))))))) + ;; Return parsed entries in file order + (reverse entries))) + #:error-message-format "Error: Cannot read or parse mountinfo file ~s: ~a" + #:error-context (list path) + #:on-error '())) + + (define (safe-find-nested-mounts root-mount-point target-device) + "Find mount points that block the unmounting of ROOT-MOUNT-POINT. +TARGET-DEVICE argument is ignored. +Mountpoints are returned depth-first (in the order they can be unmounted). +ROOT-MOUNT-POINT is included." + (let* ((mountinfo (safe-parse-mountinfo (format #f "~a/self/mountinfo" PROC-DIR-NAME)))) + (define (safe-find-mounts-via-mountinfo accumulator lives root-mount-point) + (if (member root-mount-point accumulator) + (format (current-error-port) "Cycle detected~%")) + (let ((accumulator (cons root-mount-point accumulator))) + (if (= lives 0) + (begin + (format (current-error-port) "Error: Recursive mountpoints too deep.~%") + accumulator) + (let ((root-entry (find (lambda (entry) + (match entry + ((_ _ mp) (string=? mp root-mount-point)) + (_ #f))) ; Should not happen + mountinfo))) + (if root-entry + (let ((root-mount-id (car root-entry))) + (fold (lambda (entry accumulator) + (match entry + ((_ parent-id mp) + (if (= parent-id root-mount-id) + (safe-find-mounts-via-mountinfo accumulator + (- lives 1) + mp) + accumulator)) + (_ accumulator))) + accumulator + mountinfo)) + (begin + (format (current-error-port) "Error: Could not find mount ID for ~s in parsed mountinfo~%" + root-mount-point) + accumulator)))))) + (safe-find-mounts-via-mountinfo '() 100 root-mount-point))) + + ;;; End of inlined module (fuser) + + (define MOUNT-POINT "/") + + (define O_ACCMODE #o0003) + + (define (flags-has-write-access? flags) + "Given open FLAGS, return whether it (probably) signifies write access." + (and flags (not (= (logand flags O_ACCMODE) + O_RDONLY)))) + + (define (ask-to-kill? pid command) + "Ask whether to kill process with id PID (and command COMMAND)" + (format (current-error-port) "~%Process Found: PID ~a Command: ~s~%" pid command) + (format (current-error-port) "Kill process ~a? [y/N] " pid) + (force-output (current-error-port)) + (let ((response (read-char (current-input-port)))) + (if (not (eof-object? response)) + ;; Consume rest of line. + (read-line (current-input-port))) + (or (eqv? response #\y) + (eqv? response #\Y)))) + + (define (clean-up . args) + (let* ((error-port (current-error-port)) + (root-device (safe-get-device MOUNT-POINT)) + (mounts (safe-find-nested-mounts MOUNT-POINT root-device)) + (mount-devices (map safe-get-device mounts))) + (format error-port "Searching for processes writing to files on devices ~s (mount points ~s)...~%" + mount-devices mounts) + (let* ((our-pid (getpid)) + (pids (filter (lambda (pid) + (not (= pid our-pid))) + (safe-get-processes))) + (pids (filter (lambda (pid) + (match (filter-process-fd-flags pid + (safe-get-process-fds pid) + (lambda (fd flags) + (and (flags-has-write-access? flags) + (find (lambda (target-device) + (safe-fd-on-device? pid fd target-device)) + mount-devices)))) + ((x . _) #t) + (_ #f))) + pids))) + (format error-port "Found ~a process(es) matching the criteria.~%" (length pids)) + (for-each (lambda (pid) + (let ((command (safe-get-process-command pid))) + (if (ask-to-kill? pid command) + (safe-kill-process pid SIGKILL) + (format error-port "Skipping PID ~a (~s).~%" pid command)))) + pids)) + (format error-port "~%Process scan complete.~%") + (format error-port "Searching for nested mounts of ~s...~%" MOUNT-POINT) + (if (null? mounts) + (format error-port "No nested mount points found.~%") + (begin + (format error-port "Found nested mount points that would need unmounting:~%") + (for-each (lambda (mp) + (format #t " ~s~%" mp) + (safe-umount mp)) + mounts))))) + + (define (call-with-mounted-filesystem source mountpoint filesystem-type options proc) + (mount source mountpoint file-system-type options #:update-mtab? #f) + (catch #t + (lambda () + (proc) + (umount mountpoint)) + (lambda args + (umount mountpoint)))) + (sync) - (let ((null (%make-void-port "w"))) + (let* ((null (%make-void-port "w")) + (call-with-io-file (lambda (file-name proc) + (let ((port (open file-name O_RDWR))) + (set-current-input-port port) + (set-current-output-port port) + (set-current-error-port port) + (catch #t proc + (lambda args + (set-current-input-port null) + (set-current-output-port null) + (set-current-error-port null) + (close port)))))) + (with-mounted-filesystem (syntax-rules () + ((with-mounted-filesystem source filesystem-type mountpoint options . exps) + (call-with-mounted-filesystem source filesystem-type mountpoint options + (lambda () . exps)))))) + ;; Redirect the default output ports. (set-current-output-port null) (set-current-error-port null) @@ -363,21 +690,47 @@ (define %root-file-system-shepherd-service ;; root file system can be re-mounted read-only. (let loop ((n 10)) (unless (catch 'system-error + (lambda () + (mount #f "/" #f + (logior MS_REMOUNT MS_RDONLY) + #:update-mtab? #f) + #t) + (const #f)) + (when (zero? n) + ;; TODO: pivot-root to /run/booted-system/initrd first--so we don't try to kill ourselves. + ;; But that's on /gnu/store--which we don't have anymore. + ;; Instead, we'll just exempt outselves (see "our-pid")--and possibly miss things. + (with-mounted-filesystem "none" "proc" "/proc" 0 + (with-mounted-filesystem "none" "devtmpfs" "/dev" 0 + (catch 'system-error (lambda () - (mount #f "/" #f - (logior MS_REMOUNT MS_RDONLY) - #:update-mtab? #f) - #t) + (mknod "/dev/tty" 'char-special #o600 (+ (* 5 256) 0))) (const #f)) - (unless (zero? n) - ;; Yield to the other fibers. That gives logging fibers - ;; an opportunity to close log files so the 'mount' call - ;; doesn't fail with EBUSY. - ((@ (fibers) sleep) 1) - (loop (- n 1))))) + (call-with-io-file "/dev/tty" + (lambda () + ;; we don't have chvt :( + ;; (it would need to use %ioctl fd VT_ACTIVATE int on /dev/tty) + ;(chvt 12) + (clean-up))))) + ;; Should have been unmounted already--but we are paranoid + ;; (and probably were blocking ourselves anyway). + (catch 'system-error + (lambda () + (mount #f "/" #f + (logior MS_REMOUNT MS_RDONLY) + #:update-mtab? #f) + #t) + (const #f)) + ((@ (fibers) sleep) 10)) + (unless (zero? n) + ;; Yield to the other fibers. That gives logging fibers + ;; an opportunity to close log files so the 'mount' call + ;; doesn't fail with EBUSY. + ((@ (fibers) sleep) 1) + (loop (- n 1))))) - #f))) - (respawn? #f))) + #f))) + (respawn? #f))) (define root-file-system-service-type (shepherd-service-type 'root-file-system base-commit: 85b5c2c8f66aed05730f6c7bdeabfaadf619bb8f prerequisite-patch-id: 1a4781dff5873451484bba21bc0dc4617075cb55 prerequisite-patch-id: bbe7274727aa8e1bf4beee1acafbd0a3fdc9257a -- 2.49.0
GNU bug tracking system
Copyright (C) 1999 Darren O. Benham,
1997,2003 nCipher Corporation Ltd,
1994-97 Ian Jackson.