GNU bug report logs - #75581
30.0.93; Crash when copy-sequence on clear-string'ed multibyte strings with text properties

Previous Next

Package: emacs;

Reported by: Kanakana <gudzpoz <at> gmail.com>

Date: Wed, 15 Jan 2025 14:37:02 UTC

Severity: normal

Found in version 30.0.93

Fixed in version 31.1

Done: Stefan Kangas <stefankangas <at> gmail.com>

Bug is archived. No further changes may be made.

Full log


Message #37 received at 75581 <at> debbugs.gnu.org (full text, mbox):

From: Pip Cet <pipcet <at> protonmail.com>
To: Stefan Kangas <stefankangas <at> gmail.com>
Cc: 75581 <at> debbugs.gnu.org, Kanakana <gudzpoz <at> gmail.com>,
 Andreas Schwab <schwab <at> linux-m68k.org>
Subject: Re: bug#75581: 30.0.93;
 Crash when copy-sequence on clear-string'ed multibyte strings with
 text properties
Date: Thu, 16 Jan 2025 00:03:10 +0000
Pip Cet <pipcet <at> protonmail.com> writes:

> "Stefan Kangas" <stefankangas <at> gmail.com> writes:
>
>> Pip Cet <pipcet <at> protonmail.com> writes:
>>
>>> Looks good to me.  However, on the scratch/no-purespace branch, we need
>>> to ensure that this function cannot be called on the empty multibyte
>>> string, turning it into a second empty unibyte string!  I'll push a fix
>>> to that branch in a bit.
>>
>> Please do, thanks.
>
> No need, I had forgotten the strange way STRING_SET_MULTIBYTE treats its
> argument as an lvalue.  It's a minor issue, but I would prefer it if the
> argument were provided as a pointer to a Lisp_Object so it's clear it
> can be modified.

Patch (I've verified that the eassume-in-a-loop thing doesn't generate
any code with current GCC, but it'd be nice to check this for clang):

STRING_SET_UNIBYTE (str[i++]) probably wasn't worth worrying about, but
STRING_SET_UNIBYTE (empty_multibyte_string) seemed like a plausible
mistake to me, and would have had dire results.  Anyway, changing the
API ensures we have a good look at all callers, and everything appears
to be in order (except that I think asetting a single-character string
should always work; right now, sometimes (aset str 0 #x100) will fail to
work when (progn (aset str 0 0) (aset str 0 #x100)) would work, based on
the character supposedly overwritten.  IOW, we can't implement
dead-store elimination for aset because an apparent "dead" store can
have an effect).

commit 9df159043cff1af82848e3a232c9e6cf2ecf5992 (HEAD)
Author: Pip Cet <pipcet <at> protonmail.com>
Date:   Wed Jan 15 23:15:19 2025 +0000

    Align STRING_SET_UNIBYTE and STRING_SET_MULTIBYTE (bug#75581)
    
    Calling convention is now that both identifiers refer to
    ordinary (inline) functions receiving a pointer to a Lisp_Object,
    which is modified to be the unique appropriate empty string if
    necessary.
    
    * src/alloc.c (make_multibyte_string): Drop 'register' hint.
    (make_string_from_bytes): Drop 'register' hint; pass a pointer to the
    Lisp_Object which might be modified.
    (make_specified_string):
    (make_clear_string):
    modified.
    * src/print.c (Fprin1_to_string):
    * src/fns.c (Fdelete):
    (Fclear_string): Pass a pointer to the Lisp_Object which might be
    * src/data.c (Faset): Also drop 'register' hint.
    * src/lisp.h (STRING_SET_UNIBYTE, STRING_SET_MULTIBYTE): Move.  Turn
    both into functions with the same calling convention.  Assert that no
    invalid multibyte string is created in 'STRING_SET_MULTIBYTE'.

diff --git a/src/alloc.c b/src/alloc.c
index 8307c74c106..1e9d3bfd69f 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -2510,7 +2510,7 @@ make_unibyte_string (const char *contents, ptrdiff_t length)
 make_multibyte_string (const char *contents,
 		       ptrdiff_t nchars, ptrdiff_t nbytes)
 {
-  register Lisp_Object val;
+  Lisp_Object val;
   val = make_uninit_multibyte_string (nchars, nbytes);
   memcpy (SDATA (val), contents, nbytes);
   return val;
@@ -2524,11 +2524,11 @@ make_multibyte_string (const char *contents,
 make_string_from_bytes (const char *contents,
 			ptrdiff_t nchars, ptrdiff_t nbytes)
 {
-  register Lisp_Object val;
+  Lisp_Object val;
   val = make_uninit_multibyte_string (nchars, nbytes);
   memcpy (SDATA (val), contents, nbytes);
   if (SBYTES (val) == SCHARS (val))
-    STRING_SET_UNIBYTE (val);
+    STRING_SET_UNIBYTE (&val);
   return val;
 }
 
@@ -2555,7 +2555,7 @@ make_specified_string (const char *contents,
   val = make_uninit_multibyte_string (nchars, nbytes);
   memcpy (SDATA (val), contents, nbytes);
   if (!multibyte)
-    STRING_SET_UNIBYTE (val);
+    STRING_SET_UNIBYTE (&val);
   return val;
 }
 
@@ -2572,7 +2572,7 @@ make_clear_string (EMACS_INT length, bool clearit)
   if (!length)
     return empty_unibyte_string;
   val = make_clear_multibyte_string (length, length, clearit);
-  STRING_SET_UNIBYTE (val);
+  STRING_SET_UNIBYTE (&val);
   return val;
 }
 
diff --git a/src/data.c b/src/data.c
index be85f817014..deaa8c502d4 100644
--- a/src/data.c
+++ b/src/data.c
@@ -2584,9 +2584,9 @@ DEFUN ("aset", Faset, Saset, 3, 3, 0,
        doc: /* Store into the element of ARRAY at index IDX the value NEWELT.
 Return NEWELT.  ARRAY may be a vector, a string, a char-table or a
 bool-vector.  IDX starts at 0.  */)
-  (register Lisp_Object array, Lisp_Object idx, Lisp_Object newelt)
+  (Lisp_Object array, Lisp_Object idx, Lisp_Object newelt)
 {
-  register EMACS_INT idxval;
+  EMACS_INT idxval;
 
   CHECK_FIXNUM (idx);
   idxval = XFIXNUM (idx);
@@ -2646,7 +2646,7 @@ DEFUN ("aset", Faset, Saset, 3, 3, 0,
 	    if (!ASCII_CHAR_P (SREF (array, i)))
 	      args_out_of_range (array, newelt);
 	  /* ARRAY is an ASCII string.  Convert it to a multibyte string.  */
-	  STRING_SET_MULTIBYTE (array);
+	  STRING_SET_MULTIBYTE (&array);
 	  idxval_byte = idxval;
 	  p1 = SDATA (array) + idxval_byte;
 	  prev_bytes = 1;
diff --git a/src/fns.c b/src/fns.c
index 191154c651a..edde21469cb 100644
--- a/src/fns.c
+++ b/src/fns.c
@@ -2203,7 +2203,7 @@ DEFUN ("delete", Fdelete, Sdelete, 2, 2, 0,
 
 	  tem = make_uninit_multibyte_string (nchars, nbytes);
 	  if (!STRING_MULTIBYTE (seq))
-	    STRING_SET_UNIBYTE (tem);
+	    STRING_SET_UNIBYTE (&tem);
 
 	  for (i = nchars = nbytes = ibyte = 0;
 	       i < SCHARS (seq);
@@ -3320,7 +3320,7 @@ DEFUN ("clear-string", Fclear_string, Sclear_string,
       CHECK_IMPURE (string, XSTRING (string));
       memset (SDATA (string), 0, len);
       STRING_SET_CHARS (string, len);
-      STRING_SET_UNIBYTE (string);
+      STRING_SET_UNIBYTE (&string);
     }
   return Qnil;
 }
diff --git a/src/lisp.h b/src/lisp.h
index a8fe2e9f6bc..31c087eda73 100644
--- a/src/lisp.h
+++ b/src/lisp.h
@@ -1654,25 +1654,6 @@ STRING_MULTIBYTE (Lisp_Object str)
 #define STRING_BYTES_BOUND  \
   ((ptrdiff_t) min (MOST_POSITIVE_FIXNUM, min (SIZE_MAX, PTRDIFF_MAX) - 1))
 
-/* Mark STR as a unibyte string.  */
-#define STRING_SET_UNIBYTE(STR)				\
-  do {							\
-    if (XSTRING (STR)->u.s.size == 0)			\
-      (STR) = empty_unibyte_string;			\
-    else						\
-      XSTRING (STR)->u.s.size_byte = -1;		\
-  } while (false)
-
-/* Mark STR as a multibyte string.  Assure that STR contains only
-   ASCII characters in advance.  */
-INLINE void
-STRING_SET_MULTIBYTE (Lisp_Object str)
-{
-  /* The 0-length strings are unique&shared so we can't modify them.  */
-  eassert (XSTRING (str)->u.s.size > 0);
-  XSTRING (str)->u.s.size_byte = XSTRING (str)->u.s.size;
-}
-
 /* Convenience functions for dealing with Lisp strings.  */
 
 /* WARNING: Use the 'char *' pointers to string data with care in code
@@ -4605,6 +4586,7 @@ list4i (intmax_t a, intmax_t b, intmax_t c, intmax_t d)
 extern Lisp_Object make_formatted_string (char *, const char *, ...)
   ATTRIBUTE_FORMAT_PRINTF (2, 3);
 extern Lisp_Object make_unibyte_string (const char *, ptrdiff_t);
+
 extern ptrdiff_t vectorlike_nbytes (const union vectorlike_header *hdr);
 
 INLINE ptrdiff_t
@@ -5242,6 +5224,32 @@ fast_c_string_match_ignore_case (Lisp_Object regexp,
 #endif
 extern Lisp_Object decode_env_path (const char *, const char *, bool);
 extern Lisp_Object empty_unibyte_string, empty_multibyte_string;
+
+/* Mark *STRPTR as a unibyte string.  Modify *STRPTR if necessary.  */
+INLINE void
+STRING_SET_UNIBYTE (Lisp_Object *strptr)
+{
+  eassume (STRING_MULTIBYTE (*strptr));
+  if (XSTRING (*strptr)->u.s.size == 0)
+    *strptr = empty_unibyte_string;
+  else
+    XSTRING (*strptr)->u.s.size_byte = -1;
+}
+
+/* Mark *STRPTR as a multibyte string.  Assumes that *STRPTR contains
+   only ASCII characters.  Modify *STRPTR if necessary.  */
+INLINE void
+STRING_SET_MULTIBYTE (Lisp_Object *strptr)
+{
+  eassume (!STRING_MULTIBYTE (*strptr));
+  for (ptrdiff_t i = 0; i < SBYTES (*strptr); i++)
+    eassume (ASCII_CHAR_P (SDATA (*strptr)[i]));
+  if (XSTRING (*strptr)->u.s.size == 0)
+    *strptr = empty_multibyte_string;
+  else
+    XSTRING (*strptr)->u.s.size_byte = XSTRING (*strptr)->u.s.size;
+}
+
 extern AVOID terminate_due_to_signal (int, int);
 #ifdef WINDOWSNT
 extern Lisp_Object Vlibrary_cache;
diff --git a/src/print.c b/src/print.c
index f3814859cb3..62c7780416a 100644
--- a/src/print.c
+++ b/src/print.c
@@ -819,7 +819,7 @@ DEFUN ("prin1-to-string", Fprin1_to_string, Sprin1_to_string, 1, 3, 0,
   set_buffer_internal (XBUFFER (Vprin1_to_string_buffer));
   object = Fbuffer_string ();
   if (SBYTES (object) == SCHARS (object))
-    STRING_SET_UNIBYTE (object);
+    STRING_SET_UNIBYTE (&object);
 
   /* Note that this won't make prepare_to_modify_buffer call
      ask-user-about-supersession-threat because this buffer

(I'd also like to propose that (aset str 0 x) always should work on
single-character strings; right now, if str contains a single unibyte
non-ASCII string and x is >= 0x100, the aset will fail, which seems
weird to me).

Pip





This bug report was last modified 184 days ago.

Previous Next


GNU bug tracking system
Copyright (C) 1999 Darren O. Benham, 1997,2003 nCipher Corporation Ltd, 1994-97 Ian Jackson.