GNU bug report logs - #9318
23.3.50; The first call of encode-coding-region() returns wrong result on on Windows

Previous Next

Package: emacs;

Reported by: Kazuhiro Ito <kzhr <at> d1.dion.ne.jp>

Date: Thu, 18 Aug 2011 09:04:02 UTC

Severity: normal

Found in version 23.3.50

Fixed in version 24.0.93

Done: Glenn Morris <rgm <at> gnu.org>

Bug is archived. No further changes may be made.

Full log


View this message in rfc822 format

From: Kenichi Handa <handa <at> m17n.org>
To: Kazuhiro Ito <kzhr <at> d1.dion.ne.jp>
Cc: cyd <at> stupidchicken.com, schwab <at> linux-m68k.org, 9318 <at> debbugs.gnu.org
Subject: bug#9318: 23.3.50; The first call of encode-coding-region() returns wrong result
Date: Thu, 01 Dec 2011 10:56:12 +0900
In article <20110830233131.C74A61E0043 <at> msa101.auone-net.jp>, Kazuhiro Ito <kzhr <at> d1.dion.ne.jp> writes:

> Here is the patch for the code, which contains Andreas' patch.  In my
> environment, problems are fixed.  I think it would be better that the
> interface of encode_designation_at_bol() is changed.

Oops, sorry, I have vaguely thought that your patch below
has already been applied, but just noticed that it was not.
I'll commit a slightly modified version including the
improved interface for encode_designation_at_bol soon.

By the way, it would be good if we had a way to suppress
buffer text relocation temporarily.

---
Kenichi Handa
handa <at> m17n.org

> === modified file 'src/coding.c'
> --- src/coding.c	2011-05-09 09:59:23 +0000
> +++ src/coding.c	2011-08-28 07:33:54 +0000
> @@ -1026,6 +1026,54 @@
>        }									     \
>    } while (0)
 
> +#define CODING_ENCODE_CHAR(coding, dst, dst_end, charset, c, code)	\
> +  do {									\
> +    charset_map_loaded = 0;						\
> +    code = ENCODE_CHAR (charset, c);					\
> +    if (charset_map_loaded)						\
> +      {									\
> +	const unsigned char *orig = coding->destination;		\
> +	EMACS_INT offset;						\
> +									\
> +	coding_set_destination (coding);				\
> +	offset = coding->destination - orig;				\
> +	dst += offset;							\
> +	dst_end += offset;						\
> +      }									\
> +  } while (0)
> +
> +#define CODING_CHAR_CHARSET(coding, dst, dst_end, c, charset_list, code_return, charset) \
> +  do {									\
> +    charset_map_loaded = 0;						\
> +    charset = char_charset (c, charset_list, code_return);		\
> +    if (charset_map_loaded)						\
> +      {									\
> +	const unsigned char *orig = coding->destination;		\
> +	EMACS_INT offset;						\
> +									\
> +	coding_set_destination (coding);				\
> +	offset = coding->destination - orig;				\
> +	dst += offset;							\
> +	dst_end += offset;						\
> +      }									\
> +  } while (0)
> +
> +#define CODING_CHAR_CHARSET_P(coding, dst, dst_end, c, charset, result) \
> +  do {									\
> +    charset_map_loaded = 0;						\
> +    result = CHAR_CHARSET_P(c, charset);				\
> +    if (charset_map_loaded)						\
> +      {									\
> +	const unsigned char *orig = coding->destination;		\
> +	EMACS_INT offset;						\
> +									\
> +	coding_set_destination (coding);				\
> +	offset = coding->destination - orig;				\
> +	dst += offset;							\
> +	dst_end += offset;						\
> +      }									\
> +  } while (0)
> +
 
>  /* If there are at least BYTES length of room at dst, allocate memory
>     for coding->destination and update dst and dst_end.  We don't have
> @@ -2778,14 +2826,19 @@
 
>  	  if (preferred_charset_id >= 0)
>  	    {
> +	      int result;
> +
>  	      charset = CHARSET_FROM_ID (preferred_charset_id);
> -	      if (CHAR_CHARSET_P (c, charset))
> +	      CODING_CHAR_CHARSET_P (coding, dst, dst_end, c, charset, result);
> +	      if (result)
>  		code = ENCODE_CHAR (charset, c);
>  	      else
> -		charset = char_charset (c, charset_list, &code);
> +		CODING_CHAR_CHARSET(coding, dst, dst_end, c, charset_list,
> +				    &code, charset);
>  	    }
>  	  else
> -	    charset = char_charset (c, charset_list, &code);
> +	    CODING_CHAR_CHARSET(coding, dst, dst_end, c, charset_list,
> +				&code, charset);
>  	  if (! charset)
>  	    {
>  	      c = coding->default_char;
> @@ -2794,7 +2847,8 @@
>  		  EMIT_ONE_ASCII_BYTE (c);
>  		  continue;
>  		}
> -	      charset = char_charset (c, charset_list, &code);
> +	      CODING_CHAR_CHARSET(coding, dst, dst_end, c, charset_list,
> +				  &code, charset);
>  	    }
>  	  dimension = CHARSET_DIMENSION (charset);
>  	  emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
> @@ -4317,8 +4371,9 @@
 
>  #define ENCODE_ISO_CHARACTER(charset, c)				   \
>    do {									   \
> -    int code = ENCODE_CHAR ((charset),(c));				   \
> -									   \
> +    int code;								   \
> +    CODING_ENCODE_CHAR (coding, dst, dst_end, (charset), (c), code);	   \
> +    									   \
>      if (CHARSET_DIMENSION (charset) == 1)				   \
>        ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code);		   \
>      else								   \
> @@ -4476,7 +4531,17 @@
>        c = *charbuf++;
>        if (c == '\n')
>  	break;
> +
> +      charset_map_loaded = 0;
>        charset = char_charset (c, charset_list, NULL);
> +      if (charset_map_loaded)
> +	{
> +	  const unsigned char *orig = coding->destination;
> +
> +	  coding_set_destination (coding);
> +	  dst += coding->destination - orig;
> +	}
> +
>        id = CHARSET_ID (charset);
>        reg = CODING_ISO_REQUEST (coding, id);
>        if (reg >= 0 && r[reg] < 0)
> @@ -4543,6 +4608,12 @@
 
>  	  /* We have to produce designation sequences if any now.  */
>  	  dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
> +	  if (charset_map_loaded)
> +	    {
> +	      EMACS_INT offset = coding->destination + coding->dst_bytes - dst_end;
> +	      dst_end += offset;
> +	      dst_prev += offset;
> +	    }
>  	  bol_designation = 0;
>  	  /* We are sure that designation sequences are all ASCII bytes.  */
>  	  produced_chars += dst - dst_prev;
> @@ -4616,12 +4687,17 @@
 
>  	  if (preferred_charset_id >= 0)
>  	    {
> +	      int result;
> +
>  	      charset = CHARSET_FROM_ID (preferred_charset_id);
> -	      if (! CHAR_CHARSET_P (c, charset))
> -		charset = char_charset (c, charset_list, NULL);
> +	      CODING_CHAR_CHARSET_P (coding, dst, dst_end, c, charset, result);
> +	      if (! result)
> +		CODING_CHAR_CHARSET(coding, dst, dst_end, c, charset_list,
> +				    NULL, charset);
>  	    }
>  	  else
> -	    charset = char_charset (c, charset_list, NULL);
> +	    CODING_CHAR_CHARSET(coding, dst, dst_end, c, charset_list,
> +				NULL, charset);
>  	  if (!charset)
>  	    {
>  	      if (coding->mode & CODING_MODE_SAFE_ENCODING)
> @@ -4632,7 +4708,8 @@
>  	      else
>  		{
>  		  c = coding->default_char;
> -		  charset = char_charset (c, charset_list, NULL);
> +		  CODING_CHAR_CHARSET(coding, dst, dst_end, c,
> +				      charset_list, NULL, charset);
>  		}
>  	    }
>  	  ENCODE_ISO_CHARACTER (charset, c);
> @@ -5064,7 +5141,9 @@
>        else
>  	{
>  	  unsigned code;
> -	  struct charset *charset = char_charset (c, charset_list, &code);
> +	  struct charset *charset;
> +	  CODING_CHAR_CHARSET(coding, dst, dst_end, c, charset_list,
> +			      &code, charset);
 
>  	  if (!charset)
>  	    {
> @@ -5076,7 +5155,8 @@
>  	      else
>  		{
>  		  c = coding->default_char;
> -		  charset = char_charset (c, charset_list, &code);
> +		  CODING_CHAR_CHARSET(coding, dst, dst_end, c,
> +				      charset_list, &code, charset);
>  		}
>  	    }
>  	  if (code == CHARSET_INVALID_CODE (charset))
> @@ -5153,7 +5233,9 @@
>        else
>  	{
>  	  unsigned code;
> -	  struct charset *charset = char_charset (c, charset_list, &code);
> +	  struct charset *charset;
> +	  CODING_CHAR_CHARSET(coding, dst, dst_end, c, charset_list,
> +			      &code, charset);
 
>  	  if (! charset)
>  	    {
> @@ -5165,7 +5247,8 @@
>  	      else
>  		{
>  		  c = coding->default_char;
> -		  charset = char_charset (c, charset_list, &code);
> +		  CODING_CHAR_CHARSET(coding, dst, dst_end, c,
> +				      charset_list, &code, charset);
>  		}
>  	    }
>  	  if (code == CHARSET_INVALID_CODE (charset))
> @@ -5747,7 +5831,9 @@
>  	}
>        else
>  	{
> -	  charset = char_charset (c, charset_list, &code);
> +	  CODING_CHAR_CHARSET(coding, dst, dst_end, c, charset_list,
> +			      &code, charset);
> +
>  	  if (charset)
>  	    {
>  	      if (CHARSET_DIMENSION (charset) == 1)


> -- 
> Kazuhiro Ito





This bug report was last modified 13 years and 170 days ago.

Previous Next


GNU bug tracking system
Copyright (C) 1999 Darren O. Benham, 1997,2003 nCipher Corporation Ltd, 1994-97 Ian Jackson.