GNU bug report logs - #8602
Lisp reader mishandles large non-base-10 integers on 64-bit hosts

Previous Next

Package: emacs;

Reported by: Paul Eggert <eggert <at> cs.ucla.edu>

Date: Mon, 2 May 2011 03:04:01 UTC

Severity: normal

Done: Paul Eggert <eggert <at> cs.ucla.edu>

Bug is archived. No further changes may be made.

Full log


Message #5 received at submit <at> debbugs.gnu.org (full text, mbox):

From: Paul Eggert <eggert <at> cs.ucla.edu>
To: bug-gnu-emacs <at> gnu.org
Subject: Lisp reader mishandles large non-base-10 integers on 64-bit hosts
Date: Sun, 01 May 2011 20:02:55 -0700
In the Emacs trunk I found some more problems with the Lisp
reader and large integers.  It uses a floating-point number
to keep track of the integer's value, which leads to incorrect
answers with large integers on 64-bit hosts.  In some cases the
errors are fairly extreme.  I plan to install the following patch
after some more testing.

* lread.c (read_integer): Be more consistent with string-to-number.
Use string_to_number to do the actual conversion; this avoids
rounding errors and fixes some other screwups.  Without this fix,
for example, #x1fffffffffffffff was misread as -2305843009213693952.
(digit_to_number): Move earlier, for benefit of read_integer.
Return -1 if the digit is out of range for the base, -2 if it is
not a digit in any supported base.
=== modified file 'src/lread.c'
--- src/lread.c	2011-04-29 07:55:25 +0000
+++ src/lread.c	2011-05-02 02:18:43 +0000
@@ -2245,6 +2245,26 @@
     }
 }

+/* Return the digit that CHARACTER stands for in the given BASE.
+   Return -1 if CHARACTER is out of range for BASE,
+   and -2 if CHARACTER is not valid for any supported BASE.  */
+static inline int
+digit_to_number (int character, int base)
+{
+  int digit;
+
+  if ('0' <= character && character <= '9')
+    digit = character - '0';
+  else if ('a' <= character && character <= 'z')
+    digit = character - 'a' + 10;
+  else if ('A' <= character && character <= 'Z')
+    digit = character - 'A' + 10;
+  else
+    return -2;
+
+  return digit < base ? digit : -1;
+}
+
 /* Read an integer in radix RADIX using READCHARFUN to read
    characters.  RADIX must be in the interval [2..36]; if it isn't, a
    read error is signaled .  Value is the integer read.  Signals an
@@ -2254,59 +2274,64 @@
 static Lisp_Object
 read_integer (Lisp_Object readcharfun, int radix)
 {
-  int ndigits = 0, invalid_p, c, sign = 0;
-  /* We use a floating point number because  */
-  double number = 0;
+  /* Room for sign, leading 0, other digits, trailing null byte.  */
+  char buf[1 + 1 + sizeof (uintmax_t) * CHAR_BIT + 1];
+
+  int valid = -1; /* 1 if valid, 0 if not, -1 if incomplete.  */

   if (radix < 2 || radix > 36)
-    invalid_p = 1;
+    valid = 0;
   else
     {
-      number = ndigits = invalid_p = 0;
-      sign = 1;
+      char *p = buf;
+      int c, digit;

       c = READCHAR;
-      if (c == '-')
+      if (c == '-' || c == '+')
 	{
+	  *p++ = c;
 	  c = READCHAR;
-	  sign = -1;
-	}
-      else if (c == '+')
-	c = READCHAR;
-
-      while (c >= 0)
-	{
-	  int digit;
-
-	  if (c >= '0' && c <= '9')
-	    digit = c - '0';
-	  else if (c >= 'a' && c <= 'z')
-	    digit = c - 'a' + 10;
-	  else if (c >= 'A' && c <= 'Z')
-	    digit = c - 'A' + 10;
+	}
+
+      if (c == '0')
+	{
+	  *p++ = c;
+	  valid = 1;
+
+	  /* Ignore redundant leading zeros, so the buffer doesn't
+	     fill up with them.  */
+	  do
+	    c = READCHAR;
+	  while (c == '0');
+	}
+
+      while (-1 <= (digit = digit_to_number (c, radix)))
+	{
+	  if (digit == -1)
+	    valid = 0;
+	  if (valid < 0)
+	    valid = 1;
+
+	  if (p < buf + sizeof buf - 1)
+	    *p++ = c;
 	  else
-	    {
-	      UNREAD (c);
-	      break;
-	    }
-
-	  if (digit < 0 || digit >= radix)
-	    invalid_p = 1;
-
-	  number = radix * number + digit;
-	  ++ndigits;
+	    valid = 0;
+
 	  c = READCHAR;
 	}
+
+      if (c >= 0)
+	UNREAD (c);
+      *p = '\0';
     }

-  if (ndigits == 0 || invalid_p)
+  if (! valid)
     {
-      char buf[50];
       sprintf (buf, "integer, radix %d", radix);
       invalid_syntax (buf, 0);
     }

-  return make_fixnum_or_float (sign * number);
+  return string_to_number (buf, radix, 0);
 }


@@ -3165,23 +3190,6 @@
 }

 
-static inline int
-digit_to_number (int character, int base)
-{
-  int digit;
-
-  if ('0' <= character && character <= '9')
-    digit = character - '0';
-  else if ('a' <= character && character <= 'z')
-    digit = character - 'a' + 10;
-  else if ('A' <= character && character <= 'Z')
-    digit = character - 'A' + 10;
-  else
-    return -1;
-
-  return digit < base ? digit : -1;
-}
-
 #define LEAD_INT 1
 #define DOT_CHAR 2
 #define TRAIL_INT 4





This bug report was last modified 14 years and 16 days ago.

Previous Next


GNU bug tracking system
Copyright (C) 1999 Darren O. Benham, 1997,2003 nCipher Corporation Ltd, 1994-97 Ian Jackson.