Package: emacs;
Reported by: Jay Berkenbilt <ejb <at> ql.org>
Date: Fri, 3 Jul 2009 01:45:04 UTC
Severity: normal
Done: Chong Yidong <cyd <at> stupidchicken.com>
Bug is archived. No further changes may be made.
View this message in rfc822 format
From: Kenichi Handa <handa <at> m17n.org> To: 3745 <at> debbugs.gnu.org Cc: cyd <at> stupidchicken.com, 3745 <at> debbugs.gnu.org Subject: bug#3745: 23.0.95; emacs-23.0.95: unibyte-display-via-language-environment Date: Mon, 06 Jul 2009 15:50:58 +0900
In article <tl74otqk501.fsf <at> m17n.org>, Kenichi Handa <handa <at> m17n.org> writes: > But, using unibyte_char_to_multibyte here is a clear bug. > If the overhead by DECODE_CHAR is untolerable (I don't > believe it), we can do this: > (1) modify unibyte_char_to_multibyte to use BYTE8_TO_CHAR > instead of the table unibyte_to_multibyte_table. > (2) Setup unibyte_to_multibyte_table for unibyte_charset. > (3) Just lookup that table in x_produce_glyphs. To minimize the changes, I made the attached patch. It doesn't touch unibyte_to_multibyte_table, but introduced charset_unibyte_decoder[128]. I confirmed it didn't make the display code slow. --- Kenichi Handa handa <at> m17n.org Index: character.c =================================================================== RCS file: /cvsroot/emacs/emacs/src/character.c,v retrieving revision 1.24 diff -u -r1.24 character.c --- character.c 5 Feb 2009 08:46:52 -0000 1.24 +++ character.c 6 Jul 2009 06:42:31 -0000 @@ -90,9 +90,9 @@ /* Mapping table from unibyte chars to multibyte chars. */ int unibyte_to_multibyte_table[256]; -/* Nth element is 1 iff unibyte char N can be mapped to a multibyte - char. */ -char unibyte_has_multibyte_table[256]; +/* Decoding table for 8-bit byte codes of the charset charset_unibyte. + Nth element is for the code (N-0x80). */ +int charset_unibyte_decoder[128]; @@ -270,9 +270,8 @@ return c; } -/* Convert the multibyte character C to unibyte 8-bit character based - on the current value of charset_unibyte. If dimension of - charset_unibyte is more than one, return (C & 0xFF). +/* Convert ASCII or 8-bit character C to unibyte. If C is none of + them, return (C & 0xFF). The argument REV_TBL is now ignored. It will be removed in the future. */ @@ -282,14 +281,11 @@ int c; Lisp_Object rev_tbl; { - struct charset *charset; - unsigned c1; - + if (c < 0x80) + return c; if (CHAR_BYTE8_P (c)) return CHAR_TO_BYTE8 (c); - charset = CHARSET_FROM_ID (charset_unibyte); - c1 = ENCODE_CHAR (charset, c); - return ((c1 != CHARSET_INVALID_CODE (charset)) ? c1 : c & 0xFF); + return (c & 0xFF); } /* Like multibyte_char_to_unibyte, but return -1 if C is not supported @@ -302,11 +298,11 @@ struct charset *charset; unsigned c1; + if (c < 0x80) + return c; if (CHAR_BYTE8_P (c)) return CHAR_TO_BYTE8 (c); - charset = CHARSET_FROM_ID (charset_unibyte); - c1 = ENCODE_CHAR (charset, c); - return ((c1 != CHARSET_INVALID_CODE (charset)) ? c1 : -1); + return -1; } DEFUN ("characterp", Fcharacterp, Scharacterp, 1, 2, 0, @@ -337,10 +333,8 @@ c = XFASTINT (ch); if (c >= 0400) error ("Invalid unibyte character: %d", c); - charset = CHARSET_FROM_ID (charset_unibyte); - c = DECODE_CHAR (charset, c); - if (c < 0) - c = BYTE8_TO_CHAR (XFASTINT (ch)); + if (c >= 0x80) + c = BYTE8_TO_CHAR (c); return make_number (c); } Index: character.h =================================================================== RCS file: /cvsroot/emacs/emacs/src/character.h,v retrieving revision 1.15 diff -u -r1.15 character.h --- character.h 8 Jan 2009 03:15:27 -0000 1.15 +++ character.h 6 Jul 2009 06:42:31 -0000 @@ -87,11 +87,15 @@ #define unibyte_char_to_multibyte(c) \ ((c) < 256 ? unibyte_to_multibyte_table[(c)] : (c)) -/* Nth element is 1 iff unibyte char N can be mapped to a multibyte - char. */ -extern char unibyte_has_multibyte_table[256]; - -#define UNIBYTE_CHAR_HAS_MULTIBYTE_P(c) (unibyte_has_multibyte_table[(c)]) +/* Decoding table for 8-bit byte codes of the charset charset_unibyte. + Nth element is for the code (N-0x80). */ +extern int charset_unibyte_decoder[128]; + +/* Return a character correspoinding to the code BYTE of + charset_unibyte. BYTE must be a byte; i.e. less than 0x100. If + BYTE is not a valid code of charset_unibyte, return -1. */ +#define DECODE_UNIBYTE(BYTE) \ + ((BYTE) < 0x80 ? (int) (BYTE) : charset_unibyte_decoder[(BYTE) - 0x80]) /* If C is not ASCII, make it unibyte. */ #define MAKE_CHAR_UNIBYTE(c) \ Index: charset.c =================================================================== RCS file: /cvsroot/emacs/emacs/src/charset.c,v retrieving revision 1.179 diff -u -r1.179 charset.c --- charset.c 9 Jun 2009 02:53:07 -0000 1.179 +++ charset.c 6 Jul 2009 06:42:32 -0000 @@ -2260,6 +2260,7 @@ Vcharset_ordered_list = Fnconc (2, arglist); charset_ordered_list_tick++; + charset_unibyte = -1; for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil; CONSP (old_list); old_list = XCDR (old_list)) { @@ -2267,9 +2268,25 @@ list_2022 = Fcons (XCAR (old_list), list_2022); if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list))) list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule); + if (charset_unibyte < 0) + { + struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (old_list))); + + if (CHARSET_DIMENSION (charset) == 1 + && CHARSET_ASCII_COMPATIBLE_P (charset) + && CHARSET_MAX_CHAR (charset) >= 0x80) + charset_unibyte = CHARSET_ID (charset); + } } Viso_2022_charset_list = Fnreverse (list_2022); Vemacs_mule_charset_list = Fnreverse (list_emacs_mule); + if (charset_unibyte < 0) + charset_unibyte = charset_iso_8859_1; + { + struct charset *charset = CHARSET_FROM_ID (charset_unibyte); + for (i = 128; i < 256; i++) + charset_unibyte_decoder[i - 128] = DECODE_CHAR (charset, i); + } return Qnil; } @@ -2328,6 +2345,10 @@ unibyte_to_multibyte_table[i] = i; for (; i < 256; i++) unibyte_to_multibyte_table[i] = BYTE8_TO_CHAR (i); + for (i = 0; i < 32; i++) + charset_unibyte_decoder[i] = -1; + for (; i < 128; i++) + charset_unibyte_decoder[i] = 128 + i; } #ifdef emacs @@ -2429,6 +2450,7 @@ = define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00", 128, 255, -1, 0, -1, 0, 1, MAX_5_BYTE_CHAR + 1); + charset_unibyte = charset_iso_8859_1; } #endif /* emacs */ Index: xdisp.c =================================================================== RCS file: /cvsroot/emacs/emacs/src/xdisp.c,v retrieving revision 1.1288 diff -u -r1.1288 xdisp.c --- xdisp.c 18 Jun 2009 09:49:07 -0000 1.1288 +++ xdisp.c 6 Jul 2009 06:42:34 -0000 @@ -5743,7 +5743,7 @@ || it->c == 0xAD /* SOFT HYPHEN */))) : (it->c >= 127 && (! unibyte_display_via_language_environment - || (UNIBYTE_CHAR_HAS_MULTIBYTE_P (it->c))))))) + || (DECODE_UNIBYTE (it->c) <= 0xA0)))))) { /* IT->c is a control character which must be displayed either as '\003' or as `^C' where the '\\' and '^' @@ -21196,9 +21196,8 @@ { if (SINGLE_BYTE_CHAR_P (it->c) && unibyte_display_via_language_environment) - it->char_to_display = unibyte_char_to_multibyte (it->c); - if (! SINGLE_BYTE_CHAR_P (it->char_to_display)) { + it->char_to_display = DECODE_UNIBYTE (it->c); it->multibyte_p = 1; it->face_id = FACE_FOR_CHAR (it->f, face, it->char_to_display, -1, Qnil);
GNU bug tracking system
Copyright (C) 1999 Darren O. Benham,
1997,2003 nCipher Corporation Ltd,
1994-97 Ian Jackson.