GNU bug report logs - #28339
25.2; Emacs shows ZWNJ character (Zero Width non-Joiner) as Space

Previous Next

Package: emacs;

Reported by: Nima Aryan <nimawebgard <at> gmail.com>

Date: Sun, 3 Sep 2017 16:41:01 UTC

Severity: normal

Found in version 25.2

Done: Lars Ingebrigtsen <larsi <at> gnus.org>

Bug is archived. No further changes may be made.

Full log


View this message in rfc822 format

From: handa <handa <at> gnu.org>
To: Nima Aryan <nimawebgard <at> gmail.com>
Cc: eliz <at> gnu.org, b.riefenstahl <at> turtle-trading.net, 28339 <at> debbugs.gnu.org
Subject: bug#28339: 25.2; Emacs shows ZWNJ character (Zero Width non-Joiner) as Space
Date: Fri, 06 Oct 2017 19:05:41 +0900
In article <CALp2H_2MWgjoEEm6Rp5+5uOdMk-RbFWzaCrweo=pbdzAaq8btA <at> mail.gmail.com>, Nima Aryan <nimawebgard <at> gmail.com> writes:

> As a user I prefer absorb mode by default but some times thin-space (and
> not simple space) might be a good option to consider.

Attached patch introduces a customizable variable
arabic-shaper-ZWNJ-handling.  Shall I install it?

---
K. Handa
handa <at> gnu.org

------------------------------------------------------------
diff --git a/lisp/composite.el b/lisp/composite.el
index ab39e08..72b0ffc 100644
--- a/lisp/composite.el
+++ b/lisp/composite.el
@@ -442,8 +442,10 @@ lglyph-set-width
 (defsubst lglyph-set-adjustment (glyph &optional xoff yoff wadjust)
   (aset glyph 9 (vector (or xoff 0) (or yoff 0) (or wadjust 0))))
 
+;; Return the shallow Copy of GLYPH.
 (defsubst lglyph-copy (glyph) (copy-sequence glyph))
 
+;; Insert GLYPH at the index IDX of GSTRING.
 (defun lgstring-insert-glyph (gstring idx glyph)
   (let ((nglyphs (lgstring-glyph-len gstring))
 	(i idx))
@@ -459,6 +461,18 @@ lgstring-insert-glyph
     (lgstring-set-glyph gstring i glyph)
     gstring))
 
+;; Remove glyph at IDX from GSTRING.
+(defun lgstring-remove-glyph (gstring idx)
+  (setq gstring (copy-sequence gstring))
+  (lgstring-set-id gstring nil)
+  (let ((len (length gstring)))
+    (setq idx (+ idx 3))
+    (while (< idx len)
+      (aset gstring (1- idx) (aref gstring idx))
+      (setq idx (1+ idx)))
+    (aset gstring (1- len) nil))
+  gstring)
+
 (defun compose-glyph-string (gstring from to)
   (let ((glyph (lgstring-glyph gstring from))
 	from-pos to-pos)
diff --git a/lisp/language/misc-lang.el b/lisp/language/misc-lang.el
index 2843c7c..4e10227 100644
--- a/lisp/language/misc-lang.el
+++ b/lisp/language/misc-lang.el
@@ -75,12 +75,72 @@ 'cp1256
 	    (sample-text . "Persian	فارسی")
 	    (documentation . "Bidirectional editing is supported.")))
 
+(defcustom arabic-shaper-ZWNJ-handling nil
+  "How to handle ZWMJ in Arabic text renderling.
+This variable controls the way to handle a glyph for ZWNJ
+returned by the underling shaping engine.
+
+The default value is nil, which means that the ZWNJ glyph is
+displayed as is.
+
+If the value is `absorb', ZWNJ is absorbed into the previous
+grapheme cluster, and not displayed.
+
+If the value is `as-space', the glyph is displayed by a
+thin (i.e. 1-dot width) space.
+
+Customizing the value takes effect when you start Emacs next time."
+  :group 'mule
+  :version "27.1"
+  :type '(choice
+          (const :tag "default" nil)
+          (const :tag "as space" as-space)
+          (const :tag "absorb" absorb)))
+
+(defvar arabic-shape-log nil)
+
+(defun arabic-shape-gstring (gstring)
+  (setq gstring (font-shape-gstring gstring))
+  (push arabic-shaper-ZWNJ-handling arabic-shape-log)
+  (condition-case err
+      (when arabic-shaper-ZWNJ-handling
+        (let ((font (lgstring-font gstring))
+              (i 1)
+              (len (lgstring-glyph-len gstring))
+              (modified nil))
+          (while (< i len)
+            (let ((glyph (lgstring-glyph gstring i)))
+              (when (eq (lglyph-char glyph) #x200c)
+                (cond
+                 ((eq arabic-shaper-ZWNJ-handling 'as-space)
+                  (if (> (- (lglyph-rbearing glyph) (lglyph-lbearing glyph)) 0)
+                      (let ((space-glyph (aref (font-get-glyphs font 0 1 " ") 0)))
+                        (when space-glyph
+                          (lglyph-set-code glyph (aref space-glyph 3))
+                          (lglyph-set-width glyph (aref space-glyph 4)))))
+                  (lglyph-set-adjustment glyph 0 0 1)
+                  (setq modified t))
+                 ((eq arabic-shaper-ZWNJ-handling 'absorb)
+                  (let ((prev (lgstring-glyph gstring (1- i))))
+                    (lglyph-set-from-to prev (lglyph-from prev) (lglyph-to glyph))
+                    (push (cons "remove" (lgstring-glyph gstring i))
+                          arabic-shape-log)
+                    (setq gstring (lgstring-remove-glyph gstring i))
+                    (setq len (1- len)))
+                  (setq modified t)))))
+            (setq i (1+ i)))
+          (if modified
+              (lgstring-set-id gstring nil))))
+    (error (push err arabic-shape-log)))
+  gstring)
+
 (set-char-table-range
  composition-function-table
  '(#x600 . #x74F)
- (list (vector "[\u0600-\u074F\u200C\u200D]+" 0 'font-shape-gstring)
-       (vector "[\u200C\u200D][\u0600-\u074F\u200C\u200D]+"
-               1 'font-shape-gstring)))
+ (list (vector "[\u0600-\u074F\u200C\u200D]+" 0
+               'arabic-shape-gstring)
+       (vector "[\u200C\u200D][\u0600-\u074F\u200C\u200D]+" 1
+               'arabic-shape-gstring)))
 
 (provide 'misc-lang)
 






This bug report was last modified 4 years and 260 days ago.

Previous Next


GNU bug tracking system
Copyright (C) 1999 Darren O. Benham, 1997,2003 nCipher Corporation Ltd, 1994-97 Ian Jackson.