Package: guix-patches;
Reported by: Danny Milosavljevic <dannym <at> friendly-machines.com>
Date: Thu, 24 Apr 2025 23:04:02 UTC
Severity: normal
Message #20 received at 78051 <at> debbugs.gnu.org (full text, mbox):
From: Danny Milosavljevic <dannym <at> friendly-machines.com> To: 78051 <at> debbugs.gnu.org Cc: Danny Milosavljevic <dannym <at> friendly-machines.com> Subject: [WIP v6] services: root-file-system: In 'stop' method, find and kill processes that are writing to our filesystems, and then umount the filesystems. Date: Sat, 26 Apr 2025 12:05:24 +0200
* gnu/services/base.scm (%root-file-system-shepherd-service): In 'stop' method, find and kill processes that are writing to our filesystems, and then umount the filesystems. Change-Id: Ib0ffff2257dca5fff3df99fea2d5de81a9612336 --- gnu/services/base.scm | 2860 +++++++++++++++++++++++------------------ 1 file changed, 1627 insertions(+), 1233 deletions(-) diff --git a/gnu/services/base.scm b/gnu/services/base.scm index 8c6563c..22168a3 100644 --- a/gnu/services/base.scm +++ b/gnu/services/base.scm @@ -61,15 +61,15 @@ (define-module (gnu services base) #:use-module (gnu packages admin) #:use-module ((gnu packages linux) #:select (alsa-utils btrfs-progs crda eudev - e2fsprogs f2fs-tools fuse gpm kbd lvm2 rng-tools - util-linux xfsprogs)) + e2fsprogs f2fs-tools fuse gpm kbd lvm2 rng-tools + util-linux xfsprogs)) #:use-module (gnu packages bash) #:use-module ((gnu packages base) #:select (coreutils glibc glibc/hurd - glibc-utf8-locales - libc-utf8-locales-for-target - make-glibc-utf8-locales - tar canonical-package)) + glibc-utf8-locales + libc-utf8-locales-for-target + make-glibc-utf8-locales + tar canonical-package)) #:use-module ((gnu packages cross-base) #:select (cross-libc)) #:use-module ((gnu packages compression) #:select (gzip)) @@ -346,12 +346,373 @@ (define %root-file-system-shepherd-service (shepherd-service (documentation "Take care of the root file system.") (provision '(root-file-system)) + ;; Is it possible to have (gnu build linux-boot) loaded already? + ;; In that case, I'd like to move a lot of stuff there. + (modules '((ice-9 textual-ports) + (ice-9 control) + (ice-9 string-fun) + (ice-9 match) + (ice-9 ftw) ; scandir + (ice-9 rdelim) + (srfi srfi-1) ; filter, for-each, find. + (srfi srfi-26) ; cut + (ice-9 exceptions))) ; guard + ; TODO (guix build syscalls) (start #~(const #t)) (stop #~(lambda _ - ;; Return #f if successfully stopped. + ;;; Return #f if successfully stopped. + + ;;; Beginning of inlined module (fuser) + + (define log (make-parameter (lambda args + (apply format (current-error-port) args)))) + (define *proc-dir-name* "/proc") + (define *default-silent-errors* + (list ENOENT ESRCH)) + + (define* (call-with-safe-syscall thunk + #:key + (on-error #f) + (silent-errors *default-silent-errors*) + (error-message-format #f) + (error-context '())) + "Call THUNK, handling system errors: +- If ERROR-MESSAGE-FORMAT and the error is not in SILENT-ERRORS, calls format +with ERROR-MESSAGE-FORMAT and ERROR-CONTEXT and (strerror errno) as arguments. +- Return ON-ERROR on error." + (catch 'system-error + thunk + (lambda args + (let ((errno (system-error-errno args))) + (unless (member errno silent-errors) + (when error-message-format + (apply (log) + error-message-format + (append + error-context + (list (strerror errno)))))) + on-error)))) + + (define (safe-stat path) + "Get stat info for PATH--or #f if not possible." + (call-with-safe-syscall (lambda () (stat path)) + #:error-message-format "Error: Cannot stat ~s: ~a~%" + #:error-context (list path) + #:silent-errors '() + #:on-error #f)) + + (define (safe-umount path) ; TODO: UMOUNT_NOFOLLOW ? + "Umount PATH--if possible.." + (call-with-safe-syscall (lambda () (umount path)) + #:error-message-format "Error: Cannot umount ~s: ~a~%" + #:error-context (list path) + #:silent-errors '() + #:on-error 'error)) + + (define (safe-lstat path) + "Get lstat info for PATH--or #f if not possible." + (call-with-safe-syscall (lambda () (lstat path)) + #:error-message-format "Error: Cannot lstat ~s: ~a~%" + #:error-context (list path) + #:on-error #f)) + + (define (safe-scandir path) + "scandir PATH--or #f if not possible." + (let ((result (scandir path))) + (if result + result + (begin + ((log) "Error: Cannot scandir ~s: ?~%" path) + '())))) + +;;; Processes + + (define (safe-get-fd-flags pid fd) + "Get flags for FD in PID--or #f if not possible." + (let ((fdinfo-path (format #f "~a/~a/fdinfo/~a" *proc-dir-name* pid fd))) + (call-with-safe-syscall (lambda () + (call-with-input-file fdinfo-path + (lambda (port) + ;; Find 'flags:' line and parse octal value + (let loop () + (let ((line (get-line port))) + (cond ((eof-object? line) #f) + ((string-prefix? "flags:\t" line) + (match (string-split line #\tab) + ((_ flags-str) + (catch 'invalid-argument + (lambda () + (string->number flags-str 8)) + (lambda args + #f))) + (_ #f))) + (else (loop)))))))) + #:error-message-format "Error: Cannot read ~s: ~a~%" + #:error-context (list fdinfo-path) + #:on-error #f))) + + (define (safe-get-processes) + "Get a list of all PIDs from proc--or #f if not possible." + (let ((proc-dir *proc-dir-name*)) + (catch 'system-error + (lambda () + ;; Keep only numbers. + (filter-map string->number (safe-scandir proc-dir))) + ;; FIXME is errno even useful? + (lambda scan-err + ((log) "Error scanning ~s: ~a~%" + proc-dir (strerror (system-error-errno scan-err))) + '())))) + + (define (safe-fd-on-device? pid fd target-device) + "Return whether fd FD on pid PID is on device TARGET-DEVICE." + (let* ((fd-path (readlink (format #f "~a/~a/fd/~a" *proc-dir-name* pid fd))) + (stat (safe-lstat fd-path))) + (and stat (eqv? (stat:dev stat) + target-device)))) + + (define (safe-get-process-fds pid) + "Get a list of all FDs of PID from proc--or #f if not possible." + (let ((fd-dir (format #f "~a/~a/fd" *proc-dir-name* pid))) + ;; Keep only numbers. + (filter-map string->number (safe-scandir fd-dir)))) + + (define (filter-process-fd-flags pid fds predicate) + "Get FLAGS from proc for PID and call PREDICATE with (FD FLAGS) each." + (filter (lambda (fd) + (predicate fd (safe-get-fd-flags pid fd))) + fds)) + + (define (safe-get-process-command pid) + "Return command of process PID--or #f if not possible." + (let ((cmdline-path (format #f "~a/~a/cmdline" *proc-dir-name* pid))) + (call-with-safe-syscall (lambda () + (call-with-input-file cmdline-path + (lambda (port) + (let ((full-cmdline (get-string-all port))) + (match (string-split full-cmdline #\nul) + ((command-name . _) command-name)))))) + #:error-message-format "Error: Cannot read ~s: ~a~%" + #:error-context (list cmdline-path) + #:on-error #f))) + + (define (safe-kill-process pid kill-signal) + "Kill process PID with KILL-SIGNAL if possible." + (call-with-safe-syscall (lambda () + (kill pid kill-signal) + #t) + #:on-error 'error + #:silent-errors '() + #:error-message-format + "Error: Failed to kill process ~a: ~a~%" + #:error-context '())) + +;;; Mounts + + (define (safe-get-device mount-point) + "Get the device ID (st_dev) of MOUNT-POINT--or #f if not possible." + (and=> + (safe-stat mount-point) ; TODO: lstat? Is that safe? + stat:dev)) + + (define (safe-parse-mountinfo path) + "Read and parse /proc/self/mountinfo (or specified path). +Return a list of parsed entries, where each entry is: +(list mount-id parent-id mount-point-string) +Return '() on file read error or if file is unparseable." + (call-with-safe-syscall ; TODO: call-with-input-file is not actually a syscall. + (lambda () + (let ((entries '())) + (call-with-input-file path + (lambda (port) + (let loop () + (let ((line (get-line port))) + (unless (eof-object? line) + (match (string-split line #\space) + ((mount-id-str parent-id-str major-minor root mount-point rest ...) + ;; Attempt to parse IDs, skip line on error + (catch 'invalid-argument + (lambda () + (let ((mount-id (string->number mount-id-str)) + (parent-id (string->number parent-id-str))) + ;; Add successfully parsed entry to list + (set! entries (cons (list mount-id parent-id mount-point) + entries)) + (loop))) + (lambda args + ((log) + "Warning: Skipping mountinfo line due to parse error: ~s (~a)~%" + line args) + (loop)))) + (x (begin + ((log) "Warning: Skipping mountinfo line: %s" x) + (loop))))))))) + ;; Return parsed entries in file order + (reverse entries))) + #:error-message-format "Error: Cannot read or parse mountinfo file ~s: ~a" + #:error-context (list path) + #:on-error '(error))) + + (define (safe-find-nested-mounts root-mount-point target-device) + "Find mount points that block the unmounting of ROOT-MOUNT-POINT. +TARGET-DEVICE argument is ignored. +Mountpoints are returned depth-first (in the order they can be unmounted). +ROOT-MOUNT-POINT is included." + (let* ((mountinfo (safe-parse-mountinfo (format #f "~a/self/mountinfo" *proc-dir-name*)))) + (define (safe-find-mounts-via-mountinfo accumulator lives root-mount-point) + (if (member root-mount-point accumulator) + ((log) "Cycle detected~%")) + (let ((accumulator (cons root-mount-point accumulator))) + (if (= lives 0) + (begin + ((log) "Error: Recursive mountpoints too deep.~%") + accumulator) + (let ((root-entry (find (lambda (entry) + (match entry + ((_ _ mp) (string=? mp root-mount-point)) + (_ #f))) ; Should not happen + mountinfo))) + (if root-entry + (let ((root-mount-id (car root-entry))) + (fold (lambda (entry accumulator) + (match entry + ((_ parent-id mp) + (if (= parent-id root-mount-id) + (safe-find-mounts-via-mountinfo accumulator + (- lives 1) + mp) + accumulator)) + (_ accumulator))) + accumulator + mountinfo)) + (begin + ((log) "Error: Could not find mount ID for ~s in parsed mountinfo~%" + root-mount-point) + accumulator)))))) + (safe-find-mounts-via-mountinfo '() 100 root-mount-point))) + + ;;; End of inlined module (fuser) + + (define *root-mount-point* "/") + + (define O_ACCMODE #o0003) + + (define (flags-has-write-access? flags) + "Given open FLAGS, return whether it (probably) signifies write access." + (and flags (not (= (logand flags O_ACCMODE) + O_RDONLY)))) + + (define (kill-process? pid command) + "Return whether to kill process with id PID (and command COMMAND)" + ((log) "~%Process Found: PID ~a Command: ~s~%" pid command) + #t + ;((log) "Kill process ~a? [y/N] " pid) + ;(force-output (current-error-port)) + ;(let ((response (read-char (current-input-port)))) + ; (if (not (eof-object? response)) + ; ;; Consume rest of line. + ; (read-line (current-input-port))) + ; (or (eqv? response #\y) + ; (eqv? response #\Y))) + ) + (sync) - (let ((null (%make-void-port "w"))) + (let* ((null (%make-void-port "w")) + (call-with-io-file (lambda (file-name proc) + (let ((port (open file-name O_RDWR))) + (set-current-input-port port) + (set-current-output-port port) + (set-current-error-port port) + (catch #t (lambda () + (proc) + (set-current-input-port null) + (set-current-output-port null) + (set-current-error-port null) + (close port)) + (lambda args + (set-current-input-port null) + (set-current-output-port null) + (set-current-error-port null) + (close port))))))) + (let-syntax ((with-mounted-filesystem (syntax-rules () + ((_ source mountpoint file-system-type flags options exp ...) + (call-with-mounted-filesystem source mountpoint file-system-type flags options + (lambda () (begin exp ...))))))) + + (define (call-with-logging thunk) + (mkdir-p "/proc") + (mkdir-p "/dev") + (with-mounted-filesystem "none" "/proc" "proc" 0 #f ; TODO: MS_NODEV, MS_NOEXEC, MS_NOSUID + (with-mounted-filesystem "none" "/dev" "devtmpfs" 0 #f ; TODO: MS_NOEXEC, MS_NOSUID + (catch 'system-error + (lambda () + (mknod "/dev/console" 'char-special #o600 (+ (* 5 256) 1))) + (const #f)) + (catch 'system-error + (lambda () + (mknod "/dev/tty0" 'char-special #o600 (+ (* 4 256) 0))) + (const #f)) + (call-with-io-file "/dev/console" ; TODO: /dev/console after we set it up using vt-set-as-console at boot or something (see plymouth). + (lambda () + ;(vt-activate (current-input-port) 12) + (thunk)))))) + + (define (get-clean-ups) + ;; We rarely (or ever) log--and if we did have a logger + ;; at all times, we'd show up on our own shitlist. + ;; So: open logger, log, close logger--on every message. + (parameterize ((log (lambda args + (call-with-logging + (lambda () + (format (current-error-port) args)))))) + (let* ((root-device (safe-get-device *root-mount-point*)) + (mounts (safe-find-nested-mounts *root-mount-point* root-device)) + (mount-devices (map safe-get-device mounts))) + (let* ((our-pid (getpid)) + (pids (filter (lambda (pid) + (not (= pid our-pid))) + (safe-get-processes))) + (pids (filter (lambda (pid) + (match (filter-process-fd-flags pid + (safe-get-process-fds pid) + (lambda (fd flags) + (and (flags-has-write-access? flags) + (find (lambda (target-device) + (safe-fd-on-device? pid fd target-device)) + mount-devices)))) + ((x . _) #t) + (_ #f))) + pids))) + (list pids mounts mount-devices))))) + + (define (call-with-mounted-filesystem source mountpoint file-system-type flags options proc) + (mount source mountpoint file-system-type flags options #:update-mtab? #f) + (catch #t + (lambda () + (proc) + (umount mountpoint)) + (lambda args + (umount mountpoint)))) + + ;; This will also take care of setting up a logger for the + ;; entire runtime of the function. + (define (kill-processes pids mounts mount-devices signal) + (call-with-logging + (lambda () + (parameterize ((log (lambda args + (apply format (current-error-port) args)))) + (let ((error-port (current-error-port))) + (format error-port "Searched for processes writing on devices ~s (mount points ~s)...~%" mount-devices mounts) + (format error-port "Found ~a process(es) matching the criteria.~%" (length pids)) + (for-each (lambda (pid) + (let ((command (safe-get-process-command pid))) + (if (kill-process? pid command) + (safe-kill-process pid signal) + (format error-port "Skipping PID ~a (~s).~%" pid command)))) + pids) + (format error-port "~%Process scan complete.~%")))))) + ;; Redirect the default output ports. (set-current-output-port null) (set-current-error-port null) @@ -363,12 +724,45 @@ (define %root-file-system-shepherd-service ;; root file system can be re-mounted read-only. (let loop ((n 10)) (unless (catch 'system-error - (lambda () - (mount #f "/" #f - (logior MS_REMOUNT MS_RDONLY) - #:update-mtab? #f) - #t) - (const #f)) + (lambda () + (mount #f "/" #f + (logior MS_REMOUNT MS_RDONLY) + #:update-mtab? #f) + #t) + (const #f)) + (when (zero? n) + ;; 1. Send SIGTERM to all writing processes (if any) + (match (get-clean-ups) + ((pids mounts mount-devices) + (when (> (length pids) 0) + (kill-processes pids mounts mount-devices SIGTERM) + ((@ (fibers) sleep) 5)))) + + ;; 2. Send SIGKILL to all writing processes + (match (get-clean-ups) + ((pids mounts mount-devices) + (when (> (length pids) 0) + (kill-processes pids mounts mount-devices SIGKILL) + ((@ (fibers) sleep) 5)) + + ;; 3. Unmount filesystems + (for-each safe-umount mounts))) + + ;; Should have been unmounted already--but we are paranoid + ;; (and possibly were blocking ourselves anyway). + (catch 'system-error + (lambda () + (mount #f "/" #f + (logior MS_REMOUNT MS_RDONLY) + #:update-mtab? #f) + ((@ (fibers) sleep) 5) ; just in case + #t) + (lambda args + ((log) "failed to remount / ro %s %~" args) + (let loopity ((q 0)) + ((log) "user, do something!~%") + ((@ (fibers) sleep) 1) + (loopity (+ q 1))))))) (unless (zero? n) ;; Yield to the other fibers. That gives logging fibers ;; an opportunity to close log files so the 'mount' call
GNU bug tracking system
Copyright (C) 1999 Darren O. Benham,
1997,2003 nCipher Corporation Ltd,
1994-97 Ian Jackson.