GNU bug report logs - #23814
24.5; bug of hz coding-system

Previous Next

Package: emacs;

Reported by: ynyaaa <at> gmail.com

Date: Tue, 21 Jun 2016 12:23:02 UTC

Severity: normal

Found in version 24.5

Fixed in version 26.1

Done: Glenn Morris <rgm <at> gnu.org>

Bug is archived. No further changes may be made.

Full log


Message #47 received at 23814 <at> debbugs.gnu.org (full text, mbox):

From: handa <handa <at> gnu.org>
To: ynyaaa <at> gmail.com
Cc: eliz <at> gnu.org, 23814 <at> debbugs.gnu.org
Subject: Re: bug#23814: 24.5; bug of hz coding-system
Date: Wed, 17 Aug 2016 23:43:13 +0900
In article <87oa4rdhvq.fsf <at> gmail.com>, ynyaaa <at> gmail.com writes:

> Hi, I tried new china-util.el. It works very well.

Thank you for testing it.

> I prefer 7bit encoding to use only 7bit data, too.
> As for elisp, "\u12345" is treated as "\u1234\ 5".

Ah, ok, I changed to encode characters not in BMP to \UXXXXXXXX.

I've just committed the attached change.

---
K. Handa
handa <at> gnu.org

2016-08-17  handa  <handa <at> gnu.org>

	* lisp/language/china-util.el (decode-hz-region): Pay
	attention to "~~}" sequence at the end of Chinese character
	range.
	(hz-category-table): New variable.
	(encode-hz-region): Convert non-encodable characters to
	\u... and \U...  Preserve ESC on ecoding.  Put
	`chinese-gb2312' `charset' text property in advance to force
	iso-2022-encoding to select chinese-gb2312 designation.

diff --git a/lisp/language/china-util.el b/lisp/language/china-util.el
index e531640..6505fb8 100644
--- a/lisp/language/china-util.el
+++ b/lisp/language/china-util.el
@@ -88,43 +88,34 @@ decode-hz-region
       (let (pos ch)
 	(narrow-to-region beg end)
 
-	;; We, at first, convert HZ/ZW to `euc-china',
+	;; We, at first, convert HZ/ZW to `iso-2022-7bit',
 	;; then decode it.
 
-	;; "~\n" -> "\n", "~~" -> "~"
+	;; "~\n" -> "", "~~" -> "~"
 	(goto-char (point-min))
 	(while (search-forward "~" nil t)
 	  (setq ch (following-char))
-	  (if (or (= ch ?\n) (= ch ?~)) (delete-char -1)))
+	  (cond ((= ch ?{)
+		 (delete-region (1- (point)) (1+ (point)))
+		 (setq pos (point))
+		 (insert iso2022-gb-designation)
+		 (if (looking-at "\\([!-}][!-~]\\)*")
+		     (goto-char (match-end 0)))
+		 (if (looking-at hz-ascii-designation)
+		     (delete-region (match-beginning 0) (match-end 0)))
+		 (insert iso2022-ascii-designation)
+		 (decode-coding-region pos (point) 'iso-2022-7bit))
+
+		((= ch ?~)
+		 (delete-char 1))
+
+		((and (= ch ?\n)
+		      decode-hz-line-continuation)
+		 (delete-region (1- (point)) (1+ (point))))
+
+		(t
+		 (forward-char 1)))))
 
-	;; "^zW...\n" -> Chinese GB2312
-	;; "~{...~}"  -> Chinese GB2312
-	(goto-char (point-min))
-	(setq beg nil)
-	(while (re-search-forward hz/zw-start-gb nil t)
-	  (setq pos (match-beginning 0)
-		ch (char-after pos))
-	  ;; Record the first position to start conversion.
-	  (or beg (setq beg pos))
-	  (end-of-line)
-	  (setq end (point))
-	  (if (>= ch 128)		; 8bit GB2312
-	      nil
-	    (goto-char pos)
-	    (delete-char 2)
-	    (setq end (- end 2))
-	    (if (= ch ?z)			; ZW -> euc-china
-		(progn
-		  (translate-region (point) end hz-set-msb-table)
-		  (goto-char end))
-	      (if (search-forward hz-ascii-designation
-				  (if decode-hz-line-continuation nil end)
-				  t)
-		  (delete-char -2))
-	      (setq end (point))
-	      (translate-region pos (point) hz-set-msb-table))))
-	(if beg
-	    (decode-coding-region beg end 'euc-china)))
       (- (point-max) (point-min)))))
 
 ;;;###autoload
@@ -133,33 +124,57 @@ decode-hz-buffer
   (interactive)
   (decode-hz-region (point-min) (point-max)))
 
+(defvar hz-category-table nil)
+
 ;;;###autoload
 (defun encode-hz-region (beg end)
   "Encode the text in the current region to HZ.
 Return the length of resulting text."
   (interactive "r")
+  (unless hz-category-table
+    (setq hz-category-table (make-category-table))
+    (with-category-table hz-category-table
+      (define-category ?c "hz encodable")
+      (map-charset-chars #'modify-category-entry 'ascii ?c)
+      (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?c)))
   (save-excursion
     (save-restriction
       (narrow-to-region beg end)
+      (with-category-table hz-category-table
+	;; ~ -> ~~
+	(goto-char (point-min))
+	(while (search-forward "~" nil t) (insert ?~))
+
+	;; ESC -> ESC ESC
+	(goto-char (point-min))
+	(while (search-forward "\e" nil t) (insert ?\e))
 
-      ;; "~" -> "~~"
-      (goto-char (point-min))
-      (while (search-forward "~" nil t)	(insert ?~))
-
-      ;; Chinese GB2312 -> "~{...~}"
-      (goto-char (point-min))
-      (if (re-search-forward "\\cc" nil t)
-	  (let (pos)
-	    (goto-char (setq pos (match-beginning 0)))
-	    (encode-coding-region pos (point-max) 'iso-2022-7bit)
-	    (goto-char pos)
-	    (while (search-forward iso2022-gb-designation nil t)
-	      (delete-char -3)
-	      (insert hz-gb-designation))
-	    (goto-char pos)
-	    (while (search-forward iso2022-ascii-designation nil t)
-	      (delete-char -3)
-	      (insert hz-ascii-designation))))
+	;; Non-ASCII-GB2312 -> \uXXXX
+	(goto-char (point-min))
+	(while (re-search-forward "\\Cc" nil t)
+	  (let ((ch (preceding-char)))
+	    (delete-char -1)
+	    (insert (format (if (< ch #x10000) "\\u%04X" "\\U%08X") ch))))
+
+	;; Prefer chinese-gb2312 for Chinese characters.
+	(put-text-property (point-min) (point-max) 'charset 'chinese-gb2312)
+	(encode-coding-region (point-min) (point-max) 'iso-2022-7bit)
+
+	;; ESC $ B ... ESC ( B  -> ~{ ... ~}
+	;; ESC ESC -> ESC
+	(goto-char (point-min))
+	(while (search-forward "\e" nil t)
+	  (if (= (following-char) ?\e)
+	      ;; ESC ESC -> ESC
+	      (delete-char 1)
+	    (forward-char -1)
+	    (if (looking-at iso2022-gb-designation)
+		(progn
+		  (delete-region (match-beginning 0) (match-end 0))
+		  (insert hz-gb-designation)
+		  (search-forward iso2022-ascii-designation nil 'move)
+		  (delete-region (match-beginning 0) (match-end 0))
+		  (insert hz-ascii-designation))))))
       (- (point-max) (point-min)))))
 
 ;;;###autoload




This bug report was last modified 8 years and 85 days ago.

Previous Next


GNU bug tracking system
Copyright (C) 1999 Darren O. Benham, 1997,2003 nCipher Corporation Ltd, 1994-97 Ian Jackson.