Package: emacs;
Reported by: Kanakana <gudzpoz <at> gmail.com>
Date: Wed, 15 Jan 2025 14:37:02 UTC
Severity: normal
Found in version 30.0.93
Fixed in version 31.1
Done: Stefan Kangas <stefankangas <at> gmail.com>
Bug is archived. No further changes may be made.
Message #37 received at 75581 <at> debbugs.gnu.org (full text, mbox):
From: Pip Cet <pipcet <at> protonmail.com> To: Stefan Kangas <stefankangas <at> gmail.com> Cc: 75581 <at> debbugs.gnu.org, Kanakana <gudzpoz <at> gmail.com>, Andreas Schwab <schwab <at> linux-m68k.org> Subject: Re: bug#75581: 30.0.93; Crash when copy-sequence on clear-string'ed multibyte strings with text properties Date: Thu, 16 Jan 2025 00:03:10 +0000
Pip Cet <pipcet <at> protonmail.com> writes: > "Stefan Kangas" <stefankangas <at> gmail.com> writes: > >> Pip Cet <pipcet <at> protonmail.com> writes: >> >>> Looks good to me. However, on the scratch/no-purespace branch, we need >>> to ensure that this function cannot be called on the empty multibyte >>> string, turning it into a second empty unibyte string! I'll push a fix >>> to that branch in a bit. >> >> Please do, thanks. > > No need, I had forgotten the strange way STRING_SET_MULTIBYTE treats its > argument as an lvalue. It's a minor issue, but I would prefer it if the > argument were provided as a pointer to a Lisp_Object so it's clear it > can be modified. Patch (I've verified that the eassume-in-a-loop thing doesn't generate any code with current GCC, but it'd be nice to check this for clang): STRING_SET_UNIBYTE (str[i++]) probably wasn't worth worrying about, but STRING_SET_UNIBYTE (empty_multibyte_string) seemed like a plausible mistake to me, and would have had dire results. Anyway, changing the API ensures we have a good look at all callers, and everything appears to be in order (except that I think asetting a single-character string should always work; right now, sometimes (aset str 0 #x100) will fail to work when (progn (aset str 0 0) (aset str 0 #x100)) would work, based on the character supposedly overwritten. IOW, we can't implement dead-store elimination for aset because an apparent "dead" store can have an effect). commit 9df159043cff1af82848e3a232c9e6cf2ecf5992 (HEAD) Author: Pip Cet <pipcet <at> protonmail.com> Date: Wed Jan 15 23:15:19 2025 +0000 Align STRING_SET_UNIBYTE and STRING_SET_MULTIBYTE (bug#75581) Calling convention is now that both identifiers refer to ordinary (inline) functions receiving a pointer to a Lisp_Object, which is modified to be the unique appropriate empty string if necessary. * src/alloc.c (make_multibyte_string): Drop 'register' hint. (make_string_from_bytes): Drop 'register' hint; pass a pointer to the Lisp_Object which might be modified. (make_specified_string): (make_clear_string): modified. * src/print.c (Fprin1_to_string): * src/fns.c (Fdelete): (Fclear_string): Pass a pointer to the Lisp_Object which might be * src/data.c (Faset): Also drop 'register' hint. * src/lisp.h (STRING_SET_UNIBYTE, STRING_SET_MULTIBYTE): Move. Turn both into functions with the same calling convention. Assert that no invalid multibyte string is created in 'STRING_SET_MULTIBYTE'. diff --git a/src/alloc.c b/src/alloc.c index 8307c74c106..1e9d3bfd69f 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -2510,7 +2510,7 @@ make_unibyte_string (const char *contents, ptrdiff_t length) make_multibyte_string (const char *contents, ptrdiff_t nchars, ptrdiff_t nbytes) { - register Lisp_Object val; + Lisp_Object val; val = make_uninit_multibyte_string (nchars, nbytes); memcpy (SDATA (val), contents, nbytes); return val; @@ -2524,11 +2524,11 @@ make_multibyte_string (const char *contents, make_string_from_bytes (const char *contents, ptrdiff_t nchars, ptrdiff_t nbytes) { - register Lisp_Object val; + Lisp_Object val; val = make_uninit_multibyte_string (nchars, nbytes); memcpy (SDATA (val), contents, nbytes); if (SBYTES (val) == SCHARS (val)) - STRING_SET_UNIBYTE (val); + STRING_SET_UNIBYTE (&val); return val; } @@ -2555,7 +2555,7 @@ make_specified_string (const char *contents, val = make_uninit_multibyte_string (nchars, nbytes); memcpy (SDATA (val), contents, nbytes); if (!multibyte) - STRING_SET_UNIBYTE (val); + STRING_SET_UNIBYTE (&val); return val; } @@ -2572,7 +2572,7 @@ make_clear_string (EMACS_INT length, bool clearit) if (!length) return empty_unibyte_string; val = make_clear_multibyte_string (length, length, clearit); - STRING_SET_UNIBYTE (val); + STRING_SET_UNIBYTE (&val); return val; } diff --git a/src/data.c b/src/data.c index be85f817014..deaa8c502d4 100644 --- a/src/data.c +++ b/src/data.c @@ -2584,9 +2584,9 @@ DEFUN ("aset", Faset, Saset, 3, 3, 0, doc: /* Store into the element of ARRAY at index IDX the value NEWELT. Return NEWELT. ARRAY may be a vector, a string, a char-table or a bool-vector. IDX starts at 0. */) - (register Lisp_Object array, Lisp_Object idx, Lisp_Object newelt) + (Lisp_Object array, Lisp_Object idx, Lisp_Object newelt) { - register EMACS_INT idxval; + EMACS_INT idxval; CHECK_FIXNUM (idx); idxval = XFIXNUM (idx); @@ -2646,7 +2646,7 @@ DEFUN ("aset", Faset, Saset, 3, 3, 0, if (!ASCII_CHAR_P (SREF (array, i))) args_out_of_range (array, newelt); /* ARRAY is an ASCII string. Convert it to a multibyte string. */ - STRING_SET_MULTIBYTE (array); + STRING_SET_MULTIBYTE (&array); idxval_byte = idxval; p1 = SDATA (array) + idxval_byte; prev_bytes = 1; diff --git a/src/fns.c b/src/fns.c index 191154c651a..edde21469cb 100644 --- a/src/fns.c +++ b/src/fns.c @@ -2203,7 +2203,7 @@ DEFUN ("delete", Fdelete, Sdelete, 2, 2, 0, tem = make_uninit_multibyte_string (nchars, nbytes); if (!STRING_MULTIBYTE (seq)) - STRING_SET_UNIBYTE (tem); + STRING_SET_UNIBYTE (&tem); for (i = nchars = nbytes = ibyte = 0; i < SCHARS (seq); @@ -3320,7 +3320,7 @@ DEFUN ("clear-string", Fclear_string, Sclear_string, CHECK_IMPURE (string, XSTRING (string)); memset (SDATA (string), 0, len); STRING_SET_CHARS (string, len); - STRING_SET_UNIBYTE (string); + STRING_SET_UNIBYTE (&string); } return Qnil; } diff --git a/src/lisp.h b/src/lisp.h index a8fe2e9f6bc..31c087eda73 100644 --- a/src/lisp.h +++ b/src/lisp.h @@ -1654,25 +1654,6 @@ STRING_MULTIBYTE (Lisp_Object str) #define STRING_BYTES_BOUND \ ((ptrdiff_t) min (MOST_POSITIVE_FIXNUM, min (SIZE_MAX, PTRDIFF_MAX) - 1)) -/* Mark STR as a unibyte string. */ -#define STRING_SET_UNIBYTE(STR) \ - do { \ - if (XSTRING (STR)->u.s.size == 0) \ - (STR) = empty_unibyte_string; \ - else \ - XSTRING (STR)->u.s.size_byte = -1; \ - } while (false) - -/* Mark STR as a multibyte string. Assure that STR contains only - ASCII characters in advance. */ -INLINE void -STRING_SET_MULTIBYTE (Lisp_Object str) -{ - /* The 0-length strings are unique&shared so we can't modify them. */ - eassert (XSTRING (str)->u.s.size > 0); - XSTRING (str)->u.s.size_byte = XSTRING (str)->u.s.size; -} - /* Convenience functions for dealing with Lisp strings. */ /* WARNING: Use the 'char *' pointers to string data with care in code @@ -4605,6 +4586,7 @@ list4i (intmax_t a, intmax_t b, intmax_t c, intmax_t d) extern Lisp_Object make_formatted_string (char *, const char *, ...) ATTRIBUTE_FORMAT_PRINTF (2, 3); extern Lisp_Object make_unibyte_string (const char *, ptrdiff_t); + extern ptrdiff_t vectorlike_nbytes (const union vectorlike_header *hdr); INLINE ptrdiff_t @@ -5242,6 +5224,32 @@ fast_c_string_match_ignore_case (Lisp_Object regexp, #endif extern Lisp_Object decode_env_path (const char *, const char *, bool); extern Lisp_Object empty_unibyte_string, empty_multibyte_string; + +/* Mark *STRPTR as a unibyte string. Modify *STRPTR if necessary. */ +INLINE void +STRING_SET_UNIBYTE (Lisp_Object *strptr) +{ + eassume (STRING_MULTIBYTE (*strptr)); + if (XSTRING (*strptr)->u.s.size == 0) + *strptr = empty_unibyte_string; + else + XSTRING (*strptr)->u.s.size_byte = -1; +} + +/* Mark *STRPTR as a multibyte string. Assumes that *STRPTR contains + only ASCII characters. Modify *STRPTR if necessary. */ +INLINE void +STRING_SET_MULTIBYTE (Lisp_Object *strptr) +{ + eassume (!STRING_MULTIBYTE (*strptr)); + for (ptrdiff_t i = 0; i < SBYTES (*strptr); i++) + eassume (ASCII_CHAR_P (SDATA (*strptr)[i])); + if (XSTRING (*strptr)->u.s.size == 0) + *strptr = empty_multibyte_string; + else + XSTRING (*strptr)->u.s.size_byte = XSTRING (*strptr)->u.s.size; +} + extern AVOID terminate_due_to_signal (int, int); #ifdef WINDOWSNT extern Lisp_Object Vlibrary_cache; diff --git a/src/print.c b/src/print.c index f3814859cb3..62c7780416a 100644 --- a/src/print.c +++ b/src/print.c @@ -819,7 +819,7 @@ DEFUN ("prin1-to-string", Fprin1_to_string, Sprin1_to_string, 1, 3, 0, set_buffer_internal (XBUFFER (Vprin1_to_string_buffer)); object = Fbuffer_string (); if (SBYTES (object) == SCHARS (object)) - STRING_SET_UNIBYTE (object); + STRING_SET_UNIBYTE (&object); /* Note that this won't make prepare_to_modify_buffer call ask-user-about-supersession-threat because this buffer (I'd also like to propose that (aset str 0 x) always should work on single-character strings; right now, if str contains a single unibyte non-ASCII string and x is >= 0x100, the aset will fail, which seems weird to me). Pip
GNU bug tracking system
Copyright (C) 1999 Darren O. Benham,
1997,2003 nCipher Corporation Ltd,
1994-97 Ian Jackson.