GNU bug report logs - #3745
23.0.95; emacs-23.0.95: unibyte-display-via-language-environment

Previous Next

Package: emacs;

Reported by: Jay Berkenbilt <ejb <at> ql.org>

Date: Fri, 3 Jul 2009 01:45:04 UTC

Severity: normal

Done: Chong Yidong <cyd <at> stupidchicken.com>

Bug is archived. No further changes may be made.

Full log


View this message in rfc822 format

From: Kenichi Handa <handa <at> m17n.org>
To: 3745 <at> debbugs.gnu.org
Cc: cyd <at> stupidchicken.com, 3745 <at> debbugs.gnu.org
Subject: bug#3745: 23.0.95; emacs-23.0.95: unibyte-display-via-language-environment
Date: Mon, 06 Jul 2009 15:50:58 +0900
In article <tl74otqk501.fsf <at> m17n.org>, Kenichi Handa <handa <at> m17n.org> writes:

> But, using unibyte_char_to_multibyte here is a clear bug.
> If the overhead by DECODE_CHAR is untolerable (I don't
> believe it), we can do this:

> (1) modify unibyte_char_to_multibyte to use BYTE8_TO_CHAR
>     instead of the table unibyte_to_multibyte_table.
> (2) Setup unibyte_to_multibyte_table for unibyte_charset.
> (3) Just lookup that table in x_produce_glyphs.

To minimize the changes, I made the attached patch.  It
doesn't touch unibyte_to_multibyte_table, but introduced
charset_unibyte_decoder[128].  I confirmed it didn't make
the display code slow.

---
Kenichi Handa
handa <at> m17n.org

Index: character.c
===================================================================
RCS file: /cvsroot/emacs/emacs/src/character.c,v
retrieving revision 1.24
diff -u -r1.24 character.c
--- character.c	5 Feb 2009 08:46:52 -0000	1.24
+++ character.c	6 Jul 2009 06:42:31 -0000
@@ -90,9 +90,9 @@
 /* Mapping table from unibyte chars to multibyte chars.  */
 int unibyte_to_multibyte_table[256];
 
-/* Nth element is 1 iff unibyte char N can be mapped to a multibyte
-   char.  */
-char unibyte_has_multibyte_table[256];
+/* Decoding table for 8-bit byte codes of the charset charset_unibyte.
+   Nth element is for the code (N-0x80).  */
+int charset_unibyte_decoder[128];
 
 
 
@@ -270,9 +270,8 @@
   return c;
 }
 
-/* Convert the multibyte character C to unibyte 8-bit character based
-   on the current value of charset_unibyte.  If dimension of
-   charset_unibyte is more than one, return (C & 0xFF).
+/* Convert ASCII or 8-bit character C to unibyte.  If C is none of
+   them, return (C & 0xFF).
 
    The argument REV_TBL is now ignored.  It will be removed in the
    future.  */
@@ -282,14 +281,11 @@
      int c;
      Lisp_Object rev_tbl;
 {
-  struct charset *charset;
-  unsigned c1;
-
+  if (c < 0x80)
+    return c;
   if (CHAR_BYTE8_P (c))
     return CHAR_TO_BYTE8 (c);
-  charset = CHARSET_FROM_ID (charset_unibyte);
-  c1 = ENCODE_CHAR (charset, c);
-  return ((c1 != CHARSET_INVALID_CODE (charset)) ? c1 : c & 0xFF);
+  return (c & 0xFF);
 }
 
 /* Like multibyte_char_to_unibyte, but return -1 if C is not supported
@@ -302,11 +298,11 @@
   struct charset *charset;
   unsigned c1;
 
+  if (c < 0x80)
+    return c;
   if (CHAR_BYTE8_P (c))
     return CHAR_TO_BYTE8 (c);
-  charset = CHARSET_FROM_ID (charset_unibyte);
-  c1 = ENCODE_CHAR (charset, c);
-  return ((c1 != CHARSET_INVALID_CODE (charset)) ? c1 : -1);
+  return -1;
 }
 
 DEFUN ("characterp", Fcharacterp, Scharacterp, 1, 2, 0,
@@ -337,10 +333,8 @@
   c = XFASTINT (ch);
   if (c >= 0400)
     error ("Invalid unibyte character: %d", c);
-  charset = CHARSET_FROM_ID (charset_unibyte);
-  c = DECODE_CHAR (charset, c);
-  if (c < 0)
-    c = BYTE8_TO_CHAR (XFASTINT (ch));
+  if (c >= 0x80)
+    c = BYTE8_TO_CHAR (c);
   return make_number (c);
 }
 
Index: character.h
===================================================================
RCS file: /cvsroot/emacs/emacs/src/character.h,v
retrieving revision 1.15
diff -u -r1.15 character.h
--- character.h	8 Jan 2009 03:15:27 -0000	1.15
+++ character.h	6 Jul 2009 06:42:31 -0000
@@ -87,11 +87,15 @@
 #define unibyte_char_to_multibyte(c)	\
   ((c) < 256 ? unibyte_to_multibyte_table[(c)] : (c))
 
-/* Nth element is 1 iff unibyte char N can be mapped to a multibyte
-   char.  */
-extern char unibyte_has_multibyte_table[256];
-
-#define UNIBYTE_CHAR_HAS_MULTIBYTE_P(c) (unibyte_has_multibyte_table[(c)])
+/* Decoding table for 8-bit byte codes of the charset charset_unibyte.
+   Nth element is for the code (N-0x80).  */
+extern int charset_unibyte_decoder[128];
+
+/* Return a character correspoinding to the code BYTE of
+   charset_unibyte.  BYTE must be a byte; i.e. less than 0x100.  If
+   BYTE is not a valid code of charset_unibyte, return -1.  */
+#define DECODE_UNIBYTE(BYTE)	\
+  ((BYTE) < 0x80 ? (int) (BYTE) : charset_unibyte_decoder[(BYTE) - 0x80])
 
 /* If C is not ASCII, make it unibyte. */
 #define MAKE_CHAR_UNIBYTE(c)	\
Index: charset.c
===================================================================
RCS file: /cvsroot/emacs/emacs/src/charset.c,v
retrieving revision 1.179
diff -u -r1.179 charset.c
--- charset.c	9 Jun 2009 02:53:07 -0000	1.179
+++ charset.c	6 Jul 2009 06:42:32 -0000
@@ -2260,6 +2260,7 @@
   Vcharset_ordered_list = Fnconc (2, arglist);
   charset_ordered_list_tick++;
 
+  charset_unibyte = -1;
   for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
        CONSP (old_list); old_list = XCDR (old_list))
     {
@@ -2267,9 +2268,25 @@
 	list_2022 = Fcons (XCAR (old_list), list_2022);
       if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
 	list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
+      if (charset_unibyte < 0)
+	{
+	  struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (old_list)));
+
+	  if (CHARSET_DIMENSION (charset) == 1
+	      && CHARSET_ASCII_COMPATIBLE_P (charset)
+	      && CHARSET_MAX_CHAR (charset) >= 0x80)
+	    charset_unibyte = CHARSET_ID (charset);
+	}
     }
   Viso_2022_charset_list = Fnreverse (list_2022);
   Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
+  if (charset_unibyte < 0)
+    charset_unibyte = charset_iso_8859_1;
+  {
+    struct charset *charset = CHARSET_FROM_ID (charset_unibyte);
+    for (i = 128; i < 256; i++)
+      charset_unibyte_decoder[i - 128] = DECODE_CHAR (charset, i);
+  }
 
   return Qnil;
 }
@@ -2328,6 +2345,10 @@
     unibyte_to_multibyte_table[i] = i;
   for (; i < 256; i++)
     unibyte_to_multibyte_table[i] = BYTE8_TO_CHAR (i);
+  for (i = 0; i < 32; i++)
+    charset_unibyte_decoder[i] = -1;
+  for (; i < 128; i++)
+    charset_unibyte_decoder[i] = 128 + i;
 }
 
 #ifdef emacs
@@ -2429,6 +2450,7 @@
     = define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00",
 			       128, 255, -1, 0, -1, 0, 1,
 			       MAX_5_BYTE_CHAR + 1);
+  charset_unibyte = charset_iso_8859_1;
 }
 
 #endif /* emacs */
Index: xdisp.c
===================================================================
RCS file: /cvsroot/emacs/emacs/src/xdisp.c,v
retrieving revision 1.1288
diff -u -r1.1288 xdisp.c
--- xdisp.c	18 Jun 2009 09:49:07 -0000	1.1288
+++ xdisp.c	6 Jul 2009 06:42:34 -0000
@@ -5743,7 +5743,7 @@
 				  || it->c == 0xAD /* SOFT HYPHEN */)))
 		       : (it->c >= 127
 			  && (! unibyte_display_via_language_environment
-			      || (UNIBYTE_CHAR_HAS_MULTIBYTE_P (it->c)))))))
+			      || (DECODE_UNIBYTE (it->c) <= 0xA0))))))
 	    {
 	      /* IT->c is a control character which must be displayed
 		 either as '\003' or as `^C' where the '\\' and '^'
@@ -21196,9 +21196,8 @@
 	{
 	  if (SINGLE_BYTE_CHAR_P (it->c)
 	      && unibyte_display_via_language_environment)
-	    it->char_to_display = unibyte_char_to_multibyte (it->c);
-	  if (! SINGLE_BYTE_CHAR_P (it->char_to_display))
 	    {
+	      it->char_to_display = DECODE_UNIBYTE (it->c);
 	      it->multibyte_p = 1;
 	      it->face_id = FACE_FOR_CHAR (it->f, face, it->char_to_display,
 					   -1, Qnil);



This bug report was last modified 15 years and 322 days ago.

Previous Next


GNU bug tracking system
Copyright (C) 1999 Darren O. Benham, 1997,2003 nCipher Corporation Ltd, 1994-97 Ian Jackson.