GNU bug report logs - #78690
31.0.50; split string: args out of range with TRIM

Previous Next

Package: emacs;

Reported by: Michael Heerdegen <michael_heerdegen <at> web.de>

Date: Wed, 4 Jun 2025 02:35:02 UTC

Severity: normal

Found in version 31.0.50

Done: Eli Zaretskii <eliz <at> gnu.org>

Bug is archived. No further changes may be made.

Full log


View this message in rfc822 format

From: Eli Zaretskii <eliz <at> gnu.org>
To: Michael Heerdegen <michael_heerdegen <at> web.de>
Cc: 78690 <at> debbugs.gnu.org
Subject: bug#78690: 31.0.50; split string: args out of range with TRIM
Date: Thu, 12 Jun 2025 10:49:21 +0300
> From: Michael Heerdegen <michael_heerdegen <at> web.de>
> Cc: 78690 <at> debbugs.gnu.org
> Date: Mon, 09 Jun 2025 03:22:16 +0200
> 
> Eli Zaretskii <eliz <at> gnu.org> writes:
> 
> > The patch below seems to fix the problem, and passes all the tests.
> > Does it look reasonable?
> 
> Thank you very much.  I did not look at the details of your change yet;
> however, I see that a string starting with whitespace still makes the
> function error:
> 
> #+begin_src emacs-lisp
> (let ((text " -*- lexical-binding: t; -*-")
>       ;;    ^^^ see here
>       (seps "-\\*-")
>       (trim "[ \t\n\r-]+"))
>   (split-string text seps nil trim))
> #+end_src
> 
>   ~~> split-string: Args out of range: " -*- lexical-binding: t; -*-", 2, 1
> 
> Could you please have a look?

Thanks.  I guess I've found something similar independently, because
the changes I have stashed (reproduced below) don't signal an error in
this case.  With those changes, I get what I think is the expected
value

  ("" "lexical-binding: t;" "")

Here's the up-to-date version of the patch:

diff --git a/lisp/subr.el b/lisp/subr.el
index 729f8b3..f674e51 100644
--- a/lisp/subr.el
+++ b/lisp/subr.el
@@ -5785,7 +5785,9 @@ split-string
 	 (start 0)
 	 this-start this-end
 	 notfirst
+         match-beg
 	 (list nil)
+         (strlen (length string))
 	 (push-one
 	  ;; Push the substring in range THIS-START to THIS-END
 	  ;; onto LIST, trimming it and perhaps discarding it.
@@ -5794,6 +5796,7 @@ split-string
 	      ;; Discard the trim from start of this substring.
 	      (let ((tem (string-match trim string this-start)))
 		(and (eq tem this-start)
+                     (<= (match-end 0) this-end)
 		     (setq this-start (match-end 0)))))
 
 	    (when (or keep-nulls (< this-start this-end))
@@ -5811,18 +5814,25 @@ split-string
 
     (while (and (string-match rexp string
 			      (if (and notfirst
-				       (= start (match-beginning 0))
-				       (< start (length string)))
+				       (= start match-beg) ; empty match
+				       (< start strlen))
 				  (1+ start) start))
-		(< start (length string)))
-      (setq notfirst t)
-      (setq this-start start this-end (match-beginning 0)
-	    start (match-end 0))
+		(< start strlen))
+      (setq notfirst t
+            match-beg (match-beginning 0))
+      ;; If the separator is right at the beginning, produce an empty
+      ;; substring in the result list.
+      (if (= start match-beg)
+          (setq this-start (match-end 0)
+                this-end this-start)
+        ;; Otherwise produce a substring from start to the separator.
+        (setq this-start start this-end match-beg))
+      (setq start (match-end 0))
 
       (funcall push-one))
 
     ;; Handle the substring at the end of STRING.
-    (setq this-start start this-end (length string))
+    (setq this-start start this-end strlen)
     (funcall push-one)
 
     (nreverse list)))
diff --git a/test/lisp/subr-tests.el b/test/lisp/subr-tests.el
index 024cbe8..2e8cbec 100644
--- a/test/lisp/subr-tests.el
+++ b/test/lisp/subr-tests.el
@@ -1505,5 +1505,14 @@ hash-table-contains-p
     (should (hash-table-contains-p 'cookie h))
     (should (hash-table-contains-p 'milk h))))
 
+(ert-deftest subr-test-split-string ()
+  (let ((text "-*- lexical-binding: t; -*-")
+        (seps "-\\*-")
+        (trim "[ \t\n\r-]+"))
+    (should (equal (split-string text seps nil trim)
+                   '("" "lexical-binding: t;" "")))
+    (should (equal (split-string text seps t trim)
+                   '("lexical-binding: t;")))))
+
 (provide 'subr-tests)
 ;;; subr-tests.el ends here




This bug report was last modified 21 days ago.

Previous Next


GNU bug tracking system
Copyright (C) 1999 Darren O. Benham, 1997,2003 nCipher Corporation Ltd, 1994-97 Ian Jackson.