GNU bug report logs -
#8344
(substring ...) crashes on large vectors
Previous Next
Reported by: Paul Eggert <eggert <at> cs.ucla.edu>
Date: Fri, 25 Mar 2011 18:27:02 UTC
Severity: normal
Done: Paul Eggert <eggert <at> cs.ucla.edu>
Bug is archived. No further changes may be made.
To add a comment to this bug, you must first unarchive it, by sending
a message to control AT debbugs.gnu.org, with unarchive 8344 in the body.
You can then email your comments to 8344 AT debbugs.gnu.org in the normal way.
Toggle the display of automated, internal messages from the tracker.
Report forwarded
to
owner <at> debbugs.gnu.org, bug-gnu-emacs <at> gnu.org
:
bug#8344
; Package
emacs
.
(Fri, 25 Mar 2011 18:27:02 GMT)
Full text and
rfc822 format available.
Acknowledgement sent
to
Paul Eggert <eggert <at> cs.ucla.edu>
:
New bug report received and forwarded. Copy sent to
bug-gnu-emacs <at> gnu.org
.
(Fri, 25 Mar 2011 18:27:02 GMT)
Full text and
rfc822 format available.
Message #5 received at submit <at> debbugs.gnu.org (full text, mbox):
I found this problem while compiling the Emacs trunk with
gcc -Wstrict-overflow.
Currently, on a 64-bit machine, (substring VEC FROM TO)
fails if TO and FROM are valid indexes and
TO - FROM is 2**31 or greater. On typical hosts there
can be buffer overruns or crashes.
The problem is that (substring ...) internally calls
(vector ...), and (vector ...) cannot create a vector
whose length is 2**31 or greater, because it follows
the Emacs convention that varargs functions count the
number of arguments using an 'int'.
The simplest and most general way to address this problem
is to change the Emacs convention to use EMACS_INT rather
than 'int' to count the number of arguments to a function.
I'm preparing a patch along those lines. The changes
to lisp.h are below; the other changes should be
straightforward albeit tedious.
Before I work any more on this, can anyone see why not
to do this?
=== modified file 'src/lisp.h'
--- src/lisp.h 2011-03-22 09:08:11 +0000
+++ src/lisp.h 2011-03-24 08:54:15 +0000
@@ -964,7 +964,7 @@ struct Lisp_Subr
Lisp_Object (*a7) (Lisp_Object, Lisp_Object, Lisp_Object, Lisp_Object, Lisp_Object, Lisp_Object, Lisp_Object);
Lisp_Object (*a8) (Lisp_Object, Lisp_Object, Lisp_Object, Lisp_Object, Lisp_Object, Lisp_Object, Lisp_Object, Lisp_Object);
Lisp_Object (*aUNEVALLED) (Lisp_Object args);
- Lisp_Object (*aMANY) (int, Lisp_Object *);
+ Lisp_Object (*aMANY) (EMACS_INT, Lisp_Object *);
} function;
short min_args, max_args;
const char *symbol_name;
@@ -1809,7 +1809,7 @@ typedef struct {
/* Note that the weird token-substitution semantics of ANSI C makes
this work for MANY and UNEVALLED. */
-#define DEFUN_ARGS_MANY (int, Lisp_Object *)
+#define DEFUN_ARGS_MANY (EMACS_INT, Lisp_Object *)
#define DEFUN_ARGS_UNEVALLED (Lisp_Object)
#define DEFUN_ARGS_0 (void)
#define DEFUN_ARGS_1 (Lisp_Object)
@@ -2079,7 +2079,7 @@ struct gcpro
volatile Lisp_Object *var;
/* Number of consecutive protected variables. */
- int nvars;
+ EMACS_INT nvars;
#ifdef DEBUG_GCPRO
int level;
@@ -2860,7 +2860,7 @@ extern Lisp_Object internal_lisp_conditi
extern Lisp_Object internal_condition_case (Lisp_Object (*) (void), Lisp_Object, Lisp_Object (*) (Lisp_Object));
extern Lisp_Object internal_condition_case_1 (Lisp_Object (*) (Lisp_Object), Lisp_Object, Lisp_Object, Lisp_Object (*) (Lisp_Object));
extern Lisp_Object internal_condition_case_2 (Lisp_Object (*) (Lisp_Object, Lisp_Object), Lisp_Object, Lisp_Object, Lisp_Object, Lisp_Object (*) (Lisp_Object));
-extern Lisp_Object internal_condition_case_n (Lisp_Object (*) (int, Lisp_Object *), int, Lisp_Object *, Lisp_Object, Lisp_Object (*) (Lisp_Object));
+extern Lisp_Object internal_condition_case_n (Lisp_Object (*) (EMACS_INT, Lisp_Object *), EMACS_INT, Lisp_Object *, Lisp_Object, Lisp_Object (*) (Lisp_Object));
extern void specbind (Lisp_Object, Lisp_Object);
extern void record_unwind_protect (Lisp_Object (*) (Lisp_Object), Lisp_Object);
extern Lisp_Object unbind_to (int, Lisp_Object);
@@ -2870,7 +2870,7 @@ extern void do_autoload (Lisp_Object, Li
extern Lisp_Object un_autoload (Lisp_Object);
EXFUN (Ffetch_bytecode, 1);
extern void init_eval_once (void);
-extern Lisp_Object safe_call (int, Lisp_Object *);
+extern Lisp_Object safe_call (EMACS_INT, Lisp_Object *);
extern Lisp_Object safe_call1 (Lisp_Object, Lisp_Object);
extern Lisp_Object safe_call2 (Lisp_Object, Lisp_Object, Lisp_Object);
extern void init_eval (void);
Information forwarded
to
owner <at> debbugs.gnu.org, bug-gnu-emacs <at> gnu.org
:
bug#8344
; Package
emacs
.
(Fri, 25 Mar 2011 21:26:02 GMT)
Full text and
rfc822 format available.
Message #8 received at 8344 <at> debbugs.gnu.org (full text, mbox):
> The problem is that (substring ...) internally calls
> (vector ...), and (vector ...) cannot create a vector
> whose length is 2**31 or greater, because it follows
> the Emacs convention that varargs functions count the
> number of arguments using an 'int'.
I'm pretty sure that even with your patch such a vector would bump into
all kinds of other problems.
Already buffers larger than 2GB are causing troubles right now. So if
you want to fix things, I'd urge you to focus on "handling an 8GB file"
(on 32bit systems this won't work, but on 64bit it should but didn't
last time I tried).
Stefan
Information forwarded
to
owner <at> debbugs.gnu.org, bug-gnu-emacs <at> gnu.org
:
bug#8344
; Package
emacs
.
(Fri, 25 Mar 2011 21:49:02 GMT)
Full text and
rfc822 format available.
Message #11 received at 8344 <at> debbugs.gnu.org (full text, mbox):
On 03/25/2011 02:24 PM, Stefan Monnier wrote:
> I'd urge you to focus on "handling an 8GB file"
> (on 32bit systems this won't work, but on 64bit it should but didn't
> last time I tried)
Yes, thanks, that is on my list of things to do. This
nargs business is one of the (easier) steps needed
to get it done. Another (easier) step will be to
merge the gnulib fixes for regular expressions into
Emacs, since gnulib has fixed several problems with
matching buffers larger than 2**31 bytes.
There are several other steps, of course, and some
will be harder.
To some extent I'm using gcc -Wstrict-overflow
as a first pass, to fix the more-obvious gotchas, in such
a way that regressions can be caught statically. Many
of the other changes won't be so easy, but by then I hope
to understand Emacs internals better. For example,
the Emacs regex code fails the static checks now, and
that is why I want to turn to the regex code at some point in the
near future.
Information forwarded
to
owner <at> debbugs.gnu.org, bug-gnu-emacs <at> gnu.org
:
bug#8344
; Package
emacs
.
(Sun, 27 Mar 2011 02:20:03 GMT)
Full text and
rfc822 format available.
Message #14 received at 8344 <at> debbugs.gnu.org (full text, mbox):
[Message part 1 (text/plain, inline)]
Here's the full patch for this, which I mentioned earlier,
to give you a feel for what's needed.
It's tedious, but is pretty straightforward.
I'll test it more before committing.
[patch.txt (text/plain, attachment)]
Information forwarded
to
owner <at> debbugs.gnu.org, bug-gnu-emacs <at> gnu.org
:
bug#8344
; Package
emacs
.
(Sun, 27 Mar 2011 07:54:02 GMT)
Full text and
rfc822 format available.
Message #17 received at 8344 <at> debbugs.gnu.org (full text, mbox):
Paul Eggert <eggert <at> cs.ucla.edu> writes:
> @@ -373,10 +373,10 @@
> path = Fsubstring (path, make_number (2), Qnil);
>
> new_argv_volatile = new_argv = (const unsigned char **)
> - alloca (max (2, nargs - 2) * sizeof (char *));
> + alloca ((nargs > 4 ? nargs - 2 : 2) * sizeof (char *));
That should perhaps be converted to use SAFE_ALLOCA.
Andreas.
--
Andreas Schwab, schwab <at> linux-m68k.org
GPG Key fingerprint = 58CA 54C7 6D53 942B 1756 01D3 44D5 214B 8276 4ED5
"And now for something completely different."
Information forwarded
to
owner <at> debbugs.gnu.org, bug-gnu-emacs <at> gnu.org
:
bug#8344
; Package
emacs
.
(Sun, 27 Mar 2011 09:11:01 GMT)
Full text and
rfc822 format available.
Message #20 received at 8344 <at> debbugs.gnu.org (full text, mbox):
On 03/27/2011 12:52 AM, Andreas Schwab wrote:
> That should perhaps be converted to use SAFE_ALLOCA.
Thanks, here's a patch for that, which I'll throw into
the pile of patches I'm testing.
* callproc.c (Fcall_process, Fcall_process_region): Use SAFE_ALLOCA
instead of alloca (Bug#8344).
=== modified file 'src/callproc.c'
--- src/callproc.c 2011-03-27 02:12:36 +0000
+++ src/callproc.c 2011-03-27 08:59:56 +0000
@@ -189,6 +189,7 @@
char buf[CALLPROC_BUFFER_SIZE_MAX];
int bufsize = CALLPROC_BUFFER_SIZE_MIN;
int count = SPECPDL_INDEX ();
+ volatile USE_SAFE_ALLOCA;
const unsigned char **volatile new_argv_volatile;
register const unsigned char **new_argv;
@@ -242,7 +243,7 @@
val = Qraw_text;
else
{
- args2 = (Lisp_Object *) alloca ((nargs + 1) * sizeof *args2);
+ SAFE_ALLOCA (args2, Lisp_Object *, (nargs + 1) * sizeof *args2);
args2[0] = Qcall_process;
for (i = 0; i < nargs; i++) args2[i + 1] = args[i];
coding_systems = Ffind_operation_coding_system (nargs + 1, args2);
@@ -372,8 +373,9 @@
&& SREF (path, 1) == ':')
path = Fsubstring (path, make_number (2), Qnil);
- new_argv_volatile = new_argv = (const unsigned char **)
- alloca ((nargs > 4 ? nargs - 2 : 2) * sizeof (char *));
+ SAFE_ALLOCA (new_argv, const unsigned char **,
+ (nargs > 4 ? nargs - 2 : 2) * sizeof *new_argv);
+ new_argv_volatile = new_argv;
if (nargs > 4)
{
register size_t i;
@@ -645,7 +647,7 @@
{
size_t i;
- args2 = (Lisp_Object *) alloca ((nargs + 1) * sizeof *args2);
+ SAFE_ALLOCA (args2, Lisp_Object *, (nargs + 1) * sizeof *args2);
args2[0] = Qcall_process;
for (i = 0; i < nargs; i++) args2[i + 1] = args[i];
coding_systems
@@ -809,6 +811,7 @@
when exiting. */
call_process_exited = 1;
+ SAFE_FREE ();
unbind_to (count, Qnil);
if (synch_process_termsig)
@@ -897,30 +900,35 @@
#endif
}
- pattern = Fexpand_file_name (Vtemp_file_name_pattern, tmpdir);
- tempfile = (char *) alloca (SBYTES (pattern) + 1);
- memcpy (tempfile, SDATA (pattern), SBYTES (pattern) + 1);
- coding_systems = Qt;
+ {
+ USE_SAFE_ALLOCA;
+ pattern = Fexpand_file_name (Vtemp_file_name_pattern, tmpdir);
+ SAFE_ALLOCA (tempfile, char *, SBYTES (pattern) + 1);
+ memcpy (tempfile, SDATA (pattern), SBYTES (pattern) + 1);
+ coding_systems = Qt;
#ifdef HAVE_MKSTEMP
- {
- int fd;
+ {
+ int fd;
- BLOCK_INPUT;
- fd = mkstemp (tempfile);
- UNBLOCK_INPUT;
- if (fd == -1)
- report_file_error ("Failed to open temporary file",
- Fcons (Vtemp_file_name_pattern, Qnil));
- else
- close (fd);
- }
+ BLOCK_INPUT;
+ fd = mkstemp (tempfile);
+ UNBLOCK_INPUT;
+ if (fd == -1)
+ report_file_error ("Failed to open temporary file",
+ Fcons (Vtemp_file_name_pattern, Qnil));
+ else
+ close (fd);
+ }
#else
- mktemp (tempfile);
+ mktemp (tempfile);
#endif
- filename_string = build_string (tempfile);
- GCPRO1 (filename_string);
+ filename_string = build_string (tempfile);
+ GCPRO1 (filename_string);
+ SAFE_FREE ();
+ }
+
start = args[0];
end = args[1];
/* Decide coding-system of the contents of the temporary file. */
@@ -930,11 +938,13 @@
val = Qraw_text;
else
{
- args2 = (Lisp_Object *) alloca ((nargs + 1) * sizeof *args2);
+ USE_SAFE_ALLOCA;
+ SAFE_ALLOCA (args2, Lisp_Object *, (nargs + 1) * sizeof *args2);
args2[0] = Qcall_process_region;
for (i = 0; i < nargs; i++) args2[i + 1] = args[i];
coding_systems = Ffind_operation_coding_system (nargs + 1, args2);
val = CONSP (coding_systems) ? XCDR (coding_systems) : Qnil;
+ SAFE_FREE ();
}
val = complement_process_encoding_system (val);
Reply sent
to
Paul Eggert <eggert <at> cs.ucla.edu>
:
You have taken responsibility.
(Wed, 30 Mar 2011 00:54:03 GMT)
Full text and
rfc822 format available.
Notification sent
to
Paul Eggert <eggert <at> cs.ucla.edu>
:
bug acknowledged by developer.
(Wed, 30 Mar 2011 00:54:03 GMT)
Full text and
rfc822 format available.
Message #25 received at 8344-done <at> debbugs.gnu.org (full text, mbox):
I committed a fix to the trunk for this,
as part of a recent merge (bzr 103776).
For Bug#8344, the merge uses size_t rather
than EMACS_INT for argument counts as I proposed earlier,
since the argument counts are always nonnegative
and are limited just by sizes that can be counted
at the C level.
bug archived.
Request was from
Debbugs Internal Request <help-debbugs <at> gnu.org>
to
internal_control <at> debbugs.gnu.org
.
(Wed, 27 Apr 2011 11:24:04 GMT)
Full text and
rfc822 format available.
This bug report was last modified 14 years and 112 days ago.
Previous Next
GNU bug tracking system
Copyright (C) 1999 Darren O. Benham,
1997,2003 nCipher Corporation Ltd,
1994-97 Ian Jackson.