Package: emacs;
Reported by: Juri Linkov <juri <at> linkov.net>
Date: Tue, 25 Mar 2025 18:44:02 UTC
Severity: normal
Fixed in version 31.0.50
Done: Juri Linkov <juri <at> linkov.net>
Bug is archived. No further changes may be made.
View this message in rfc822 format
From: Yuan Fu <casouri <at> gmail.com> To: Juri Linkov <juri <at> linkov.net> Cc: 77256 <at> debbugs.gnu.org, Vincenzo Pupillo <v.pupillo <at> gmail.com> Subject: bug#77256: Treesit language-at-point Date: Mon, 31 Mar 2025 17:27:20 -0700
> On Mar 31, 2025, at 9:53 AM, Juri Linkov <juri <at> linkov.net> wrote: > >>> Do we still need such complicated functions as >>> mhtml-ts-mode--language-at-point, js--treesit-language-at-point, etc. >>> that duplicate the rules from 'treesit-range-rules' >>> when now the default language-at-point function could be implemented >>> just as >>> >>> (treesit-parser-language >>> (or (seq-some (lambda (o) (overlay-get o 'treesit-parser)) >>> (overlays-at (point) t)) >>> treesit-primary-parser)) >> >> Yeah, we can provide a default language-at-point function now that >> determines the “most relevant parser” by embed level. But we should keep >> treesit-language-at-point-function because a) it’s already in a release >> version and b) we want major modes to be able to customize what parser to >> pick at any given point. > > Ok, so here is a complete patch: Thanks for working on this! > > diff --git a/lisp/textmodes/mhtml-ts-mode.el b/lisp/textmodes/mhtml-ts-mode.el > index 22c0455a4ee..fdb286fc8c4 100644 > --- a/lisp/textmodes/mhtml-ts-mode.el > +++ b/lisp/textmodes/mhtml-ts-mode.el > @@ -221,21 +222,6 @@ mhtml-ts-mode-menu > "Menu bar for `mhtml-ts-mode'." > css-mode--menu) > > -;; To enable some basic treesiter functionality, you should define > -;; a function that recognizes which grammar is used at-point. > -;; This function should be assigned to `treesit-language-at-point-function' > -(defun mhtml-ts-mode--language-at-point (point) > - "Return the language at POINT assuming the point is within a HTML buffer." > - (let* ((node (treesit-node-at point 'html)) > - (parent (treesit-node-parent node)) > - (node-query (format "(%s (%s))" > - (treesit-node-type parent) > - (treesit-node-type node)))) > - (cond > - ((equal "(script_element (raw_text))" node-query) (js--treesit-language-at-point point)) > - ((equal "(style_element (raw_text))" node-query) 'css) > - (t 'html)))) > - > ;; Custom font-lock function that's used to apply color to css color > ;; The signature of the function should be conforming to signature > ;; QUERY-SPEC required by `treesit-font-lock-rules'. > @@ -448,7 +434,7 @@ mhtml-ts-mode-flymake-mhtml > > ;;;###autoload > (define-derived-mode mhtml-ts-mode html-ts-mode > - '("HTML+" (:eval (let ((lang (mhtml-ts-mode--language-at-point (point)))) > + '("HTML+" (:eval (let ((lang (treesit-language-at (point)))) > (cond ((eq lang 'html) "") > ((eq lang 'javascript) "JS") > ((eq lang 'css) "CSS"))))) > @@ -527,10 +520,6 @@ mhtml-ts-mode > (setq-local c-ts-common--comment-regexp > js--treesit-jsdoc-comment-regexp)) > > - > - ;; Many treesit functions need to know the language at-point. > - ;; So you should define such a function. > - (setq-local treesit-language-at-point-function #'mhtml-ts-mode--language-at-point) > (setq-local prettify-symbols-alist mhtml-ts-mode--prettify-symbols-alist) > > ;; Indent. > diff --git a/lisp/treesit.el b/lisp/treesit.el > index 54c29326df2..75c04912216 100644 > --- a/lisp/treesit.el > +++ b/lisp/treesit.el > @@ -185,15 +185,15 @@ treesit-language-at > This function assumes that parser ranges are up-to-date. It > returns the return value of `treesit-language-at-point-function' > if it's non-nil, otherwise it returns the language of the first > -parser in `treesit-parser-list', or nil if there is no parser. > +parser from `treesit-parsers-at', or the primary parser. If we handle the fallback case in treesit-language-at directly, rather than defining a separate default function, IMO we should describe the fallback behavior as the default. So something like “Return the language at POS. When there are multiple parsers that covers POS, determine the most relevant parser (hence language) by their embed level. If treesit-language-at-point-function is non-nil, return the return value of that function instead.” (Not saying we should use this exact docstring but to illustrate the point.) > > -In a multi-language buffer, make sure > -`treesit-language-at-point-function' is implemented! Otherwise > -`treesit-language-at' wouldn't return the correct result." > +In a multi-language buffer, optionally you can implement > +`treesit-language-at-point-function' to return more correct result." > (if treesit-language-at-point-function > (funcall treesit-language-at-point-function position) > - (when-let* ((parser (car (treesit-parser-list)))) > - (treesit-parser-language parser)))) > + (treesit-parser-language > + (or (car (treesit-parsers-at position)) > + treesit-primary-parser)))) > > ;;; Node API supplement > > @@ -247,8 +247,9 @@ treesit-node-at > (parser-or-lang > (let* ((local-parser (car (treesit-local-parsers-at > pos parser-or-lang))) > - (global-parser (car (treesit-parser-list > - nil parser-or-lang))) > + (global-parser (or (car (treesit-parsers-at > + pos parser-or-lang)) > + treesit-primary-parser)) > (parser (or local-parser global-parser))) > (when parser > (treesit-parser-root-node parser)))) > @@ -267,13 +268,10 @@ treesit-node-at > (local-parser > ;; Find the local parser with highest > ;; embed-level at point. > - (car (seq-sort-by #'treesit-parser-embed-level > - (lambda (a b) > - (> (or a 0) (or b 0))) > - (treesit-local-parsers-at > - pos lang)))) > - (global-parser (car (treesit-parser-list > - nil lang))) > + (car (treesit-local-parsers-at pos lang))) > + (global-parser (or (car (treesit-parsers-at > + pos lang)) > + treesit-primary-parser)) > (parser (or local-parser global-parser))) > (when parser > (treesit-parser-root-node parser)))))) > @@ -851,6 +849,38 @@ treesit--clip-ranges > if (<= start (car range) (cdr range) end) > collect range)) > > +(defun treesit-parsers-at (&optional pos language with-host local-only) > + "Return all the non-primary parsers at POS. I get why you used treesit-parsers-at rather than a more technically correct name like treesit-non-primary-parsers-at, or treesit-embeded-parsers-at. But this is confusing for not much benefit IMO. I suggest either use treesit-parsers-at, and add an optional argument exclude-primary; or use a more correct name and don’t include the primary parser. Also, instead of using LOCAL-ONLY, we might be more future-prove to use a ONLY parameter, and let user pass ‘local to mean local-only. This way we can add the option of returning only non-local non-primary parsers in the future, should the need arises (it already kinda does in treesit-node-at and treesit-simple-imenu, technically global-parser should be picked from non-local parsers). > + > +POS defaults to point. > +If LANGUAGE is non-nil, only return parsers for LANGUAGE. > + > +If WITH-HOST is non-nil, return a list of (PARSER . HOST-PARSER) > +instead. HOST-PARSER is the host parser which created the PARSER. > + > +If LOCAL-ONLY is non-nil, return only local parsers. > +Local parsers are those which only parse a limited region marked > +by an overlay with non-nil `treesit-parser' property." > + (let ((res nil)) > + ;; Refer to (ref:local-parser-overlay) for more explanation of local > + ;; parser overlays. > + (dolist (ov (overlays-at (or pos (point)))) > + (when-let* ((parser (overlay-get ov 'treesit-parser)) > + (host-parser (or (null with-host) > + (overlay-get ov 'treesit-host-parser))) > + (_ (or (null local-only) > + (overlay-get ov 'treesit-parser-local-p))) > + (_ (or (null language) > + (eq (treesit-parser-language parser) > + language)))) > + (push (if with-host (cons parser host-parser) parser) res))) > + (seq-sort-by (lambda (p) > + (treesit-parser-embed-level > + (or (car-safe p) p))) > + (lambda (a b) > + (> (or a 0) (or b 0))) > + res))) > + > (defun treesit-local-parsers-at (&optional pos language with-host) > "Return all the local parsers at POS. > > @@ -862,19 +892,7 @@ treesit-local-parsers-at > If WITH-HOST is non-nil, return a list of (PARSER . HOST-PARSER) > instead. HOST-PARSER is the host parser which created the local > PARSER." > - (let ((res nil)) > - ;; Refer to (ref:local-parser-overlay) for more explanation of local > - ;; parser overlays. > - (dolist (ov (overlays-at (or pos (point)))) > - (let ((parser (overlay-get ov 'treesit-parser)) > - (host-parser (overlay-get ov 'treesit-host-parser)) > - (local-p (overlay-get ov 'treesit-parser-local-p))) > - (when (and parser host-parser local-p > - (or (null language) > - (eq (treesit-parser-language parser) > - language))) > - (push (if with-host (cons parser host-parser) parser) res)))) > - (nreverse res))) > + (treesit-parsers-at pos language with-host t)) > > (defun treesit-local-parsers-on (&optional beg end language with-host) > "Return the list of local parsers that cover the region between BEG and END. > @@ -3135,9 +3159,7 @@ treesit-up-list > (setq parent (treesit-parent-until parent pred))) > > (unless parent > - (let ((parsers (seq-keep (lambda (o) > - (overlay-get o 'treesit-host-parser)) > - (overlays-at (point) t)))) > + (let ((parsers (mapcar #'cdr (treesit-parsers-at (point) nil t)))) > (while (and (not parent) parsers) > (setq parent (treesit-parent-until > (treesit-node-at (point) (car parsers)) pred) > @@ -3887,9 +3929,9 @@ treesit-simple-imenu > (lambda (entry) > (let* ((lang (car entry)) > (settings (cdr entry)) > - (global-parser (car (treesit-parser-list nil lang))) > - (local-parsers > - (treesit-parser-list nil lang 'embedded))) > + (global-parser (or (car (treesit-parsers-at nil lang)) > + treesit-primary-parser)) > + (local-parsers (treesit-local-parsers-at nil lang))) > (cons (treesit-language-display-name lang) > ;; No one says you can't have both global and local > ;; parsers for the same language. E.g., Rust uses > @@ -4029,9 +4074,7 @@ treesit-outline-level > (setq level (1+ level))) > > ;; Continue counting the host nodes. > - (dolist (parser (seq-keep (lambda (o) > - (overlay-get o 'treesit-host-parser)) > - (overlays-at (point) t))) > + (dolist (parser (mapcar #'cdr (treesit-parsers-at (point) nil t))) > (let* ((node (treesit-node-at (point) parser)) > (lang (treesit-parser-language parser)) > (pred (alist-get lang treesit-aggregated-outline-predicate))) We should also update manual sections that mentions treesit-language-at. Yuan
GNU bug tracking system
Copyright (C) 1999 Darren O. Benham,
1997,2003 nCipher Corporation Ltd,
1994-97 Ian Jackson.