On 2022-11-25 01:57, Pierre Langlois wrote: > It appears this email is making it through, so here it is as an > attachment: > > From 4a3c1fff8460a03bfb7c1aada9863205cd6f22fd Mon Sep 17 00:00:00 2001 > From: Pierre Langlois > Date: Tue, 29 Mar 2022 20:13:34 +0100 > Subject: [PATCH v7 06/32] build-system: Add tree-sitter-build-system. > > * guix/build-system/tree-sitter.scm: New module. > * guix/build/tree-sitter-build-system.scm: Likewise. > * Makefile.am (MODULES): Add them. > * doc/guix.texi: Document it. > --- > Makefile.am | 2 + > doc/guix.texi | 21 ++- > guix/build-system/tree-sitter.scm | 190 ++++++++++++++++++++++++ > guix/build/tree-sitter-build-system.scm | 153 +++++++++++++++++++ > 4 files changed, 365 insertions(+), 1 deletion(-) > create mode 100644 guix/build-system/tree-sitter.scm > create mode 100644 guix/build/tree-sitter-build-system.scm > > diff --git a/Makefile.am b/Makefile.am > index c3af23b68e..a16c4fcd7e 100644 > --- a/Makefile.am > +++ b/Makefile.am > @@ -178,6 +178,7 @@ MODULES = \ > guix/build-system/ruby.scm \ > guix/build-system/scons.scm \ > guix/build-system/texlive.scm \ > + guix/build-system/tree-sitter.scm \ > guix/build-system/trivial.scm \ > guix/ftp-client.scm \ > guix/http-client.scm \ > @@ -234,6 +235,7 @@ MODULES = \ > guix/build/ruby-build-system.scm \ > guix/build/scons-build-system.scm \ > guix/build/texlive-build-system.scm \ > + guix/build/tree-sitter-build-system.scm \ > guix/build/waf-build-system.scm \ > guix/build/haskell-build-system.scm \ > guix/build/julia-build-system.scm \ > diff --git a/doc/guix.texi b/doc/guix.texi > index e547d469f4..4e997f7176 100644 > --- a/doc/guix.texi > +++ b/doc/guix.texi > @@ -79,7 +79,7 @@ Copyright @copyright{} 2020 Jack Hill@* > Copyright @copyright{} 2020 Naga Malleswari@* > Copyright @copyright{} 2020, 2021 Brice Waegeneire@* > Copyright @copyright{} 2020 R Veera Kumar@* > -Copyright @copyright{} 2020, 2021 Pierre Langlois@* > +Copyright @copyright{} 2020, 2021, 2022 Pierre Langlois@* > Copyright @copyright{} 2020 pinoaffe@* > Copyright @copyright{} 2020 André Batista@* > Copyright @copyright{} 2020, 2021 Alexandru-Sergiu Marton@* > @@ -9732,6 +9732,25 @@ be specified with the @code{#:node} parameter which defaults to > @code{node}. > @end defvr > > +@defvr {Scheme Variable} tree-sitter-build-system > + > +This variable is exported by @code{(guix build-system tree-sitter)}. It > +implements procedures to compile grammars for the > +@url{https://tree-sitter.github.io/tree-sitter/, Tree-sitter} parsing > +library. It essentially runs @code{tree-sitter generate} to translate > +@code{grammar.js} grammars to JSON and then to C. Which it then > +compiles to native code. > + > +Tree-sitter packages may support multiple grammars, so this build system > +supports a @code{#:grammar-directories} keyword to specify a list of > +locations where a @code{grammar.js} file may be found. > + > +Grammars sometimes depend on each other, such as C++ depending on C and > +TypeScript depending on JavaScript. You may use inputs to declare such > +dependencies. > + > +@end defvr > + > Lastly, for packages that do not need anything as sophisticated, a > ``trivial'' build system is provided. It is trivial in the sense that > it provides basically no support: it does not pull any implicit inputs, > diff --git a/guix/build-system/tree-sitter.scm b/guix/build-system/tree-sitter.scm > new file mode 100644 > index 0000000000..aeb96e3ef5 > --- /dev/null > +++ b/guix/build-system/tree-sitter.scm > @@ -0,0 +1,190 @@ > +;;; GNU Guix --- Functional package management for GNU > +;;; Copyright © 2022 Pierre Langlois > +;;; > +;;; This file is part of GNU Guix. > +;;; > +;;; GNU Guix is free software; you can redistribute it and/or modify it > +;;; under the terms of the GNU General Public License as published by > +;;; the Free Software Foundation; either version 3 of the License, or (at > +;;; your option) any later version. > +;;; > +;;; GNU Guix is distributed in the hope that it will be useful, but > +;;; WITHOUT ANY WARRANTY; without even the implied warranty of > +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > +;;; GNU General Public License for more details. > +;;; > +;;; You should have received a copy of the GNU General Public License > +;;; along with GNU Guix. If not, see . > + > +(define-module (guix build-system tree-sitter) > + #:use-module (guix store) > + #:use-module (guix utils) > + #:use-module (guix packages) > + #:use-module (guix gexp) > + #:use-module (guix monads) > + #:use-module (guix search-paths) > + #:use-module (guix build-system) > + #:use-module (guix build-system gnu) > + #:use-module (guix build-system node) > + #:use-module (ice-9 match) > + #:export (%tree-sitter-build-system-modules > + tree-sitter-build > + tree-sitter-build-system)) > + > +(define %tree-sitter-build-system-modules > + ;; Build-side modules imported by default. > + `((guix build tree-sitter-build-system) > + ,@%node-build-system-modules)) > + > +(define* (lower name > + #:key source inputs native-inputs outputs system target > + #:allow-other-keys > + #:rest arguments) > + "Return a bag for NAME from the given arguments." > + (define private-keywords > + `(#:inputs #:native-inputs #:outputs ,@(if target > + '() > + '(#:target)))) > + (define node > + (module-ref (resolve-interface '(gnu packages node)) > + 'node-lts)) > + (define tree-sitter > + (module-ref (resolve-interface '(gnu packages tree-sitter)) > + 'tree-sitter)) > + (define tree-sitter-cli > + (module-ref (resolve-interface '(gnu packages tree-sitter)) > + 'tree-sitter-cli)) > + ;; Grammars depend on each other via JS modules, which we package into a > + ;; dedicated js output. > + (define grammar-inputs > + (map (match-lambda > + ((name package) > + `(,name ,package "js"))) > + inputs)) > + (bag > + (name name) > + (system system) (target target) > + (build-inputs `(,@(if source > + `(("source" ,source)) > + '()) > + ("node" ,node) > + ("tree-sitter-cli" ,tree-sitter-cli) > + ,@native-inputs > + ,@(if target '() grammar-inputs) > + ;; Keep the standard inputs of 'gnu-build-system'. > + ,@(if target > + (standard-cross-packages target 'host) > + '()) > + ,@(standard-packages))) > + (host-inputs `(("tree-sitter" ,tree-sitter) > + ,@(if target grammar-inputs '()))) > + ;; Keep the standard inputs of 'gnu-buid-system'. > + (target-inputs (if target > + (standard-cross-packages target 'target) > + '())) > + ;; XXX: this is a hack to get around issue #41569. > + (outputs (match outputs > + (("out") (cons "js" outputs)) > + (_ outputs))) > + (build (if target tree-sitter-cross-build tree-sitter-build)) > + (arguments (strip-keyword-arguments private-keywords arguments)))) > + > +(define* (tree-sitter-build name inputs > + #:key > + source > + (phases '%standard-phases) > + (grammar-directories '(".")) > + (tests? #t) > + (outputs '("out" "js")) > + (search-paths '()) > + (system (%current-system)) > + (guile #f) > + (imported-modules %tree-sitter-build-system-modules) > + (modules '((guix build utils) > + (guix build tree-sitter-build-system)))) > + (define builder > + (with-imported-modules imported-modules > + #~(begin > + (use-modules #$@(sexp->gexp modules)) > + (tree-sitter-build #:name #$name > + #:source #+source > + #:system #$system > + #:phases #$phases > + #:tests? #$tests? > + #:grammar-directories '#$grammar-directories > + #:outputs #$(outputs->gexp outputs) > + #:search-paths '#$(sexp->gexp > + (map search-path-specification->sexp > + search-paths)) > + #:inputs #$(input-tuples->gexp inputs))))) > + > + (mlet %store-monad ((guile (package->derivation (or guile (default-guile)) > + system #:graft? #f))) > + (gexp->derivation name builder > + #:system system > + #:guile-for-build guile))) > + > +(define* (tree-sitter-cross-build name > + #:key > + target > + build-inputs target-inputs host-inputs > + guile source > + (phases '%standard-phases) > + (grammar-directories '(".")) > + (tests? #t) > + (outputs '("out" "js")) > + (search-paths '()) > + (native-search-paths '()) > + (system (%current-system)) > + (build (nix-system->gnu-triplet system)) > + (imported-modules %tree-sitter-build-system-modules) > + (modules '((guix build utils) > + (guix build tree-sitter-build-system)))) > + (define builder > + (with-imported-modules imported-modules > + #~(begin > + (use-modules #$@(sexp->gexp modules)) > + > + (define %build-host-inputs > + #+(input-tuples->gexp build-inputs)) > + > + (define %build-target-inputs > + (append #$(input-tuples->gexp host-inputs) > + #+(input-tuples->gexp target-inputs))) > + > + (define %build-inputs > + (append %build-host-inputs %build-target-inputs)) > + > + (tree-sitter-build #:name #$name > + #:source #+source > + #:system #$system > + #:build #$build > + #:target #$target > + #:phases #$phases > + #:tests? #$tests? > + #:grammar-directories '#$grammar-directories > + #:outputs #$(outputs->gexp outputs) > + #:inputs %build-target-inputs > + #:native-inputs %build-host-inputs > + #:search-paths '#$(sexp->gexp > + (map search-path-specification->sexp > + search-paths)) > + #:native-search-paths '#$(sexp->gexp > + (map > + search-path-specification->sexp > + native-search-paths)))))) > + > + (mlet %store-monad ((guile (package->derivation (or guile (default-guile)) > + system #:graft? #f))) > + (gexp->derivation name builder > + #:system system > + #:target target > + #:guile-for-build guile))) > + > +(define tree-sitter-build-system > + (build-system > + (name 'tree-sitter) > + (description "The Tree-sitter grammar build system") > + (lower lower))) > + > +;;; tree-sitter.scm ends here > diff --git a/guix/build/tree-sitter-build-system.scm b/guix/build/tree-sitter-build-system.scm > new file mode 100644 > index 0000000000..574b0f2a1c > --- /dev/null > +++ b/guix/build/tree-sitter-build-system.scm > @@ -0,0 +1,153 @@ > +;;; GNU Guix --- Functional package management for GNU > +;;; Copyright © 2022 Pierre Langlois > +;;; > +;;; This file is part of GNU Guix. > +;;; > +;;; GNU Guix is free software; you can redistribute it and/or modify it > +;;; under the terms of the GNU General Public License as published by > +;;; the Free Software Foundation; either version 3 of the License, or (at > +;;; your option) any later version. > +;;; > +;;; GNU Guix is distributed in the hope that it will be useful, but > +;;; WITHOUT ANY WARRANTY; without even the implied warranty of > +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > +;;; GNU General Public License for more details. > +;;; > +;;; You should have received a copy of the GNU General Public License > +;;; along with GNU Guix. If not, see . > + > +(define-module (guix build tree-sitter-build-system) > + #:use-module ((guix build node-build-system) #:prefix node:) > + #:use-module (guix build json) > + #:use-module (guix build utils) > + #:use-module (ice-9 match) > + #:use-module (ice-9 regex) > + #:use-module (srfi srfi-1) > + #:export (%standard-phases > + tree-sitter-build)) > + > +;; Commentary: > +;; > +;; Build procedures for tree-sitter grammar packages. This is the > +;; builder-side code, which builds on top fo the node build-system. > +;; > +;; Tree-sitter grammars are written in JavaScript and compiled to a native > +;; shared object. The `tree-sitter generate' command invokes `node' in order > +;; to evaluate the grammar.js into a grammar.json file, which is then > +;; translated into C code. We then compile the C code ourselves. Packages > +;; also sometimes add extra manually written C/C++ code. > +;; > +;; In order to support grammars depending on each other, such as C and C++, > +;; JavaScript and TypeScript, this build-system installs the source of the > +;; node module in a dedicated "js" output. > +;; > +;; Code: > + > +(define* (patch-dependencies #:key inputs #:allow-other-keys) > + "Rewrite dependencies in 'package.json'. We remove all runtime dependencies > +and replace development dependencies with tree-sitter grammar node modules." > + > + (define (rewrite package.json) > + (map (match-lambda > + (("dependencies" @ . _) > + '("dependencies" @)) > + (("devDependencies" @ . _) > + `("devDependencies" @ > + ,@(filter-map (match-lambda > + ((key . directory) > + (let ((node-module > + (string-append directory > + "/lib/node_modules/" > + key))) > + (and (directory-exists? node-module) > + `(,key . ,node-module))))) > + (alist-delete "node" inputs)))) > + (other other)) > + package.json)) > + > + (node:with-atomic-json-file-replacement "package.json" > + (match-lambda > + (('@ . package.json) > + (cons '@ (rewrite package.json)))))) > + > +;; FIXME: The node build-system's configure phase does not support > +;; cross-compiling so we re-define it. > +(define* (configure #:key native-inputs inputs #:allow-other-keys) > + (invoke (search-input-file (or native-inputs inputs) "/bin/npm") > + "--offline" "--ignore-scripts" "install")) > + > +(define* (build #:key grammar-directories #:allow-other-keys) > + (for-each (lambda (dir) > + (with-directory-excursion dir > + ;; Avoid generating binding code for other languages, we do > + ;; not support this use-case yet and it relies on running > + ;; `node-gyp' to build native addons. > + (invoke "tree-sitter" "generate" "--no-bindings"))) > + grammar-directories)) > + > +(define* (check #:key grammar-directories tests? #:allow-other-keys) > + (when tests? > + (for-each (lambda (dir) > + (with-directory-excursion dir > + (invoke "tree-sitter" "test"))) > + grammar-directories))) > + > +(define* (install #:key target grammar-directories outputs #:allow-other-keys) > + (let ((lib (string-append (assoc-ref outputs "out") > + "/lib/tree-sitter"))) > + (mkdir-p lib) > + (define (compile-language dir) > + (with-directory-excursion dir > + (let ((lang (assoc-ref (call-with-input-file "src/grammar.json" > + read-json) > + "name")) > + (source-file (lambda (path) > + (if (file-exists? path) > + path > + #f)))) > + (apply invoke > + `(,(if target > + (string-append target "-g++") > + "g++") > + "-shared" > + "-fPIC" > + "-fno-exceptions" > + "-O2" > + "-g" > + "-o" ,(string-append lib "/" lang ".so") > + ;; An additional `scanner.{c,cc}' file is sometimes > + ;; provided. > + ,@(cond > + ((source-file "src/scanner.c") > + => (lambda (file) (list "-xc" "-std=c99" file))) > + ((source-file "src/scanner.cc") > + => (lambda (file) (list file))) > + (else '())) > + "-xc" "src/parser.c"))))) > + (for-each compile-language grammar-directories))) > + > +(define* (install-js #:key native-inputs inputs outputs #:allow-other-keys) > + (invoke (search-input-file (or native-inputs inputs) "/bin/npm") > + "--prefix" (assoc-ref outputs "js") > + "--global" > + "--offline" > + "--loglevel" "info" > + "--production" > + ;; Skip scripts to prevent building bindings via GYP. > + "--ignore-scripts" > + "install" "../package.tgz")) > + > +(define %standard-phases > + (modify-phases node:%standard-phases > + (replace 'patch-dependencies patch-dependencies) > + (replace 'configure configure) > + (replace 'build build) > + (replace 'check check) > + (replace 'install install) > + (add-after 'install 'install-js install-js))) > + > +(define* (tree-sitter-build #:key inputs (phases %standard-phases) > + #:allow-other-keys #:rest args) > + (apply node:node-build #:inputs inputs #:phases phases args)) > + > +;;; tree-sitter-build-system.scm ends here Applied, slightly reformatted, pushed as dbd4d2d0707b486f1e2c8659e94e1d3b15e4351e Also, migrated tree-sitter-grammar fn to this build system. I still try to get the taste of packaging grammars with tree-sitter-grammar helper and just as plain packages and try to understand how good/bad template function idea is in this case and if we want to keep it or not. -- Best regards, Andrew Tropin