GNU bug report logs - #9318
23.3.50; The first call of encode-coding-region() returns wrong result on on Windows

Previous Next

Package: emacs;

Reported by: Kazuhiro Ito <kzhr <at> d1.dion.ne.jp>

Date: Thu, 18 Aug 2011 09:04:02 UTC

Severity: normal

Found in version 23.3.50

Fixed in version 24.0.93

Done: Glenn Morris <rgm <at> gnu.org>

Bug is archived. No further changes may be made.

Full log


Message #44 received at 9318 <at> debbugs.gnu.org (full text, mbox):

From: Kazuhiro Ito <kzhr <at> d1.dion.ne.jp>
To: Andreas Schwab <schwab <at> linux-m68k.org>
Cc: Eli Zaretskii <eliz <at> gnu.org>, Chong Yidong <cyd <at> stupidchicken.com>,
	9318 <at> debbugs.gnu.org
Subject: Re: bug#9318: 23.3.50;
	The first call of encode-coding-region() returns wrong result
Date: Wed, 31 Aug 2011 08:30:47 +0900
> > SUMMARY OF THE PROBLEM:
> > In encode_coding_XXX(), calling encode_char() could cause relocation
> > of buffers.  char_charset(), ENCODE_ISO_CHARACTER and ENCODE_CHAR
> > could also cause relocation because they could call encode_char().
> > After using of them, coding->destination, dst, dst_end should be
> > updated as needed.
> 
> I noticed CHAR_CHARSET_P macro slipped out of my check.
> CHAR_CHARSET_P could also cause relocation of buffers.

Here is the patch for the code, which contains Andreas' patch.  In my
environment, problems are fixed.  I think it would be better that the
interface of encode_designation_at_bol() is changed.

=== modified file 'src/coding.c'
--- src/coding.c	2011-05-09 09:59:23 +0000
+++ src/coding.c	2011-08-28 07:33:54 +0000
@@ -1026,6 +1026,54 @@
       }									     \
   } while (0)
 
+#define CODING_ENCODE_CHAR(coding, dst, dst_end, charset, c, code)	\
+  do {									\
+    charset_map_loaded = 0;						\
+    code = ENCODE_CHAR (charset, c);					\
+    if (charset_map_loaded)						\
+      {									\
+	const unsigned char *orig = coding->destination;		\
+	EMACS_INT offset;						\
+									\
+	coding_set_destination (coding);				\
+	offset = coding->destination - orig;				\
+	dst += offset;							\
+	dst_end += offset;						\
+      }									\
+  } while (0)
+
+#define CODING_CHAR_CHARSET(coding, dst, dst_end, c, charset_list, code_return, charset) \
+  do {									\
+    charset_map_loaded = 0;						\
+    charset = char_charset (c, charset_list, code_return);		\
+    if (charset_map_loaded)						\
+      {									\
+	const unsigned char *orig = coding->destination;		\
+	EMACS_INT offset;						\
+									\
+	coding_set_destination (coding);				\
+	offset = coding->destination - orig;				\
+	dst += offset;							\
+	dst_end += offset;						\
+      }									\
+  } while (0)
+
+#define CODING_CHAR_CHARSET_P(coding, dst, dst_end, c, charset, result) \
+  do {									\
+    charset_map_loaded = 0;						\
+    result = CHAR_CHARSET_P(c, charset);				\
+    if (charset_map_loaded)						\
+      {									\
+	const unsigned char *orig = coding->destination;		\
+	EMACS_INT offset;						\
+									\
+	coding_set_destination (coding);				\
+	offset = coding->destination - orig;				\
+	dst += offset;							\
+	dst_end += offset;						\
+      }									\
+  } while (0)
+
 
 /* If there are at least BYTES length of room at dst, allocate memory
    for coding->destination and update dst and dst_end.  We don't have
@@ -2778,14 +2826,19 @@
 
 	  if (preferred_charset_id >= 0)
 	    {
+	      int result;
+
 	      charset = CHARSET_FROM_ID (preferred_charset_id);
-	      if (CHAR_CHARSET_P (c, charset))
+	      CODING_CHAR_CHARSET_P (coding, dst, dst_end, c, charset, result);
+	      if (result)
 		code = ENCODE_CHAR (charset, c);
 	      else
-		charset = char_charset (c, charset_list, &code);
+		CODING_CHAR_CHARSET(coding, dst, dst_end, c, charset_list,
+				    &code, charset);
 	    }
 	  else
-	    charset = char_charset (c, charset_list, &code);
+	    CODING_CHAR_CHARSET(coding, dst, dst_end, c, charset_list,
+				&code, charset);
 	  if (! charset)
 	    {
 	      c = coding->default_char;
@@ -2794,7 +2847,8 @@
 		  EMIT_ONE_ASCII_BYTE (c);
 		  continue;
 		}
-	      charset = char_charset (c, charset_list, &code);
+	      CODING_CHAR_CHARSET(coding, dst, dst_end, c, charset_list,
+				  &code, charset);
 	    }
 	  dimension = CHARSET_DIMENSION (charset);
 	  emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
@@ -4317,8 +4371,9 @@
 
 #define ENCODE_ISO_CHARACTER(charset, c)				   \
   do {									   \
-    int code = ENCODE_CHAR ((charset),(c));				   \
-									   \
+    int code;								   \
+    CODING_ENCODE_CHAR (coding, dst, dst_end, (charset), (c), code);	   \
+    									   \
     if (CHARSET_DIMENSION (charset) == 1)				   \
       ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code);		   \
     else								   \
@@ -4476,7 +4531,17 @@
       c = *charbuf++;
       if (c == '\n')
 	break;
+
+      charset_map_loaded = 0;
       charset = char_charset (c, charset_list, NULL);
+      if (charset_map_loaded)
+	{
+	  const unsigned char *orig = coding->destination;
+
+	  coding_set_destination (coding);
+	  dst += coding->destination - orig;
+	}
+
       id = CHARSET_ID (charset);
       reg = CODING_ISO_REQUEST (coding, id);
       if (reg >= 0 && r[reg] < 0)
@@ -4543,6 +4608,12 @@
 
 	  /* We have to produce designation sequences if any now.  */
 	  dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
+	  if (charset_map_loaded)
+	    {
+	      EMACS_INT offset = coding->destination + coding->dst_bytes - dst_end;
+	      dst_end += offset;
+	      dst_prev += offset;
+	    }
 	  bol_designation = 0;
 	  /* We are sure that designation sequences are all ASCII bytes.  */
 	  produced_chars += dst - dst_prev;
@@ -4616,12 +4687,17 @@
 
 	  if (preferred_charset_id >= 0)
 	    {
+	      int result;
+
 	      charset = CHARSET_FROM_ID (preferred_charset_id);
-	      if (! CHAR_CHARSET_P (c, charset))
-		charset = char_charset (c, charset_list, NULL);
+	      CODING_CHAR_CHARSET_P (coding, dst, dst_end, c, charset, result);
+	      if (! result)
+		CODING_CHAR_CHARSET(coding, dst, dst_end, c, charset_list,
+				    NULL, charset);
 	    }
 	  else
-	    charset = char_charset (c, charset_list, NULL);
+	    CODING_CHAR_CHARSET(coding, dst, dst_end, c, charset_list,
+				NULL, charset);
 	  if (!charset)
 	    {
 	      if (coding->mode & CODING_MODE_SAFE_ENCODING)
@@ -4632,7 +4708,8 @@
 	      else
 		{
 		  c = coding->default_char;
-		  charset = char_charset (c, charset_list, NULL);
+		  CODING_CHAR_CHARSET(coding, dst, dst_end, c,
+				      charset_list, NULL, charset);
 		}
 	    }
 	  ENCODE_ISO_CHARACTER (charset, c);
@@ -5064,7 +5141,9 @@
       else
 	{
 	  unsigned code;
-	  struct charset *charset = char_charset (c, charset_list, &code);
+	  struct charset *charset;
+	  CODING_CHAR_CHARSET(coding, dst, dst_end, c, charset_list,
+			      &code, charset);
 
 	  if (!charset)
 	    {
@@ -5076,7 +5155,8 @@
 	      else
 		{
 		  c = coding->default_char;
-		  charset = char_charset (c, charset_list, &code);
+		  CODING_CHAR_CHARSET(coding, dst, dst_end, c,
+				      charset_list, &code, charset);
 		}
 	    }
 	  if (code == CHARSET_INVALID_CODE (charset))
@@ -5153,7 +5233,9 @@
       else
 	{
 	  unsigned code;
-	  struct charset *charset = char_charset (c, charset_list, &code);
+	  struct charset *charset;
+	  CODING_CHAR_CHARSET(coding, dst, dst_end, c, charset_list,
+			      &code, charset);
 
 	  if (! charset)
 	    {
@@ -5165,7 +5247,8 @@
 	      else
 		{
 		  c = coding->default_char;
-		  charset = char_charset (c, charset_list, &code);
+		  CODING_CHAR_CHARSET(coding, dst, dst_end, c,
+				      charset_list, &code, charset);
 		}
 	    }
 	  if (code == CHARSET_INVALID_CODE (charset))
@@ -5747,7 +5831,9 @@
 	}
       else
 	{
-	  charset = char_charset (c, charset_list, &code);
+	  CODING_CHAR_CHARSET(coding, dst, dst_end, c, charset_list,
+			      &code, charset);
+
 	  if (charset)
 	    {
 	      if (CHARSET_DIMENSION (charset) == 1)


-- 
Kazuhiro Ito




This bug report was last modified 13 years and 170 days ago.

Previous Next


GNU bug tracking system
Copyright (C) 1999 Darren O. Benham, 1997,2003 nCipher Corporation Ltd, 1994-97 Ian Jackson.