From b01435306a36e4e75671fbe7bacea351f89947d5 Mon Sep 17 00:00:00 2001 From: Yuan Fu Date: Sun, 2 Nov 2025 16:16:50 -0800 Subject: [PATCH] Change tree-sitter query predicate names (bug#79687) Latest tree-sitter library throws a syntax error if the predicate names in a query don't end with question mark. So we made the following change: :equal changed to :eq? :match changed to :match? :pred changed to :pred? Old names are transparently converted to new names when expanding patterns. :match predicate can now take the regexp and the node in any order: it'll figure out which is which automatically. This way it works with current Emacs convention (regexp first), as well as tree-sitter's match convention (regexp second). * doc/lispref/parsing.texi (Pattern Matching): Update manuel to use new predicate names. * src/treesit.c: (Ftreesit_pattern_expand): (Ftreesit_query_expand): (treesit_predicate_match): (treesit_eval_predicates): (syms_of_treesit): Use new predicate names. * test/src/treesit-tests.el (treesit-query-api): Update test. --- doc/lispref/parsing.texi | 34 +++++++++------ etc/NEWS | 12 ++++++ src/treesit.c | 90 ++++++++++++++++++++------------------- test/src/treesit-tests.el | 6 +-- 4 files changed, 83 insertions(+), 59 deletions(-) diff --git a/doc/lispref/parsing.texi b/doc/lispref/parsing.texi index 5734fcf8094..08f5c310a24 100644 --- a/doc/lispref/parsing.texi +++ b/doc/lispref/parsing.texi @@ -1473,7 +1473,7 @@ example, with the following pattern: @group ( (array :anchor (_) @@first (_) @@last :anchor) - (:equal @@first @@last) + (:eq? @@first @@last) ) @end group @end example @@ -1482,24 +1482,32 @@ example, with the following pattern: tree-sitter only matches arrays where the first element is equal to the last element. To attach a predicate to a pattern, we need to group them together. Currently there are three predicates: -@code{:equal}, @code{:match}, and @code{:pred}. +@code{:eq?}, @code{:match?}, and @code{:pred?}. -@deffn Predicate :equal arg1 arg2 +@deffn Predicate :eq? arg1 arg2 Matches if @var{arg1} is equal to @var{arg2}. Arguments can be either strings or capture names. Capture names represent the text that the -captured node spans in the buffer. +captured node spans in the buffer. Note that this is more like +@code{equal} in Elisp, but @code{eq?} is the convention used by +tree-sitter. Previously we supported the @code{:equal} predicate but +it's now considered deprecated. @end deffn -@deffn Predicate :match regexp capture-name +@deffn Predicate :match? capture-name regexp Matches if the text that @var{capture-name}'s node spans in the buffer matches regular expression @var{regexp}, given as a string literal. -Matching is case-sensitive. +Matching is case-sensitive. The ordering of the arguments doesn't +matter. Previously we supported the @code{:match} predicate but it's +now considered deprecated. @end deffn -@deffn Predicate :pred fn &rest nodes +@deffn Predicate :pred? fn &rest nodes Matches if function @var{fn} returns non-@code{nil} when passed each node in @var{nodes} as arguments. The function runs with the current -buffer set to the buffer of node being queried. +buffer set to the buffer of node being queried. Be very careful when +using this predicate, since it can be expensive when used in a tight +loop. Previously we supported the @code{:pred} predicate but it's now +considered deprecated. @end deffn Note that a predicate can only refer to capture names that appear in @@ -1554,9 +1562,9 @@ Anchor @code{:anchor} is written as @samp{.}. @item @samp{:+} is written as @samp{+}. @item -@code{:equal}, @code{:match} and @code{:pred} are written as -@code{#equal}, @code{#match} and @code{#pred}, respectively. -In general, predicates change their @samp{:} to @samp{#}. +@code{:eq?}, @code{:match?} and @code{:pred?} are written as +@code{#eq?}, @code{#match?} and @code{#pred?}, respectively. In +general, predicates change the @samp{:} to @samp{#}. @end itemize For example, @@ -1565,7 +1573,7 @@ For example, @group '(( (compound_expression :anchor (_) @@first (_) :* @@rest) - (:match "love" @@first) + (:match? "love" @@first) )) @end group @end example @@ -1577,7 +1585,7 @@ is written in string form as @group "( (compound_expression . (_) @@first (_)* @@rest) - (#match \"love\" @@first) + (#match? \"love\" @@first) )" @end group @end example diff --git a/etc/NEWS b/etc/NEWS index 63ea0b5a11f..d3eff6991dd 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -1049,6 +1049,18 @@ Now 'treesit-explore-mode' (or 'treesit-explore') prompts for a parser rather than a language, and it is now possible to select a local parser at point to explore. ++++ +*** Tree-sitter query predicate :equal, :match, and :pred are deprecated +Use :eq?, :match?, :pred? instead. The change is because newer +tree-sitter library mandates query predicates to end with question mark. +Emacs will transparently converts :equal, :match and :pred to :eq?, +:match? and :pred?, respectively, so existing queries still work fine +with latest tree-sitter library. Predicate :equal is changed to :eq? to +better follow tree-sitter’s convention. Also, the :match? predicates +can now take the regexp as either the first or second argument, so it +works with both tree-sitter convention (regexp arg second) and Emacs +convention (regexp arg first). + ** Hideshow +++ diff --git a/src/treesit.c b/src/treesit.c index 69751b5ea10..3230d0a50a1 100644 --- a/src/treesit.c +++ b/src/treesit.c @@ -490,17 +490,17 @@ static Lisp_Object Vtreesit_str_dot; static Lisp_Object Vtreesit_str_question_mark; static Lisp_Object Vtreesit_str_star; static Lisp_Object Vtreesit_str_plus; -static Lisp_Object Vtreesit_str_pound_equal; -static Lisp_Object Vtreesit_str_pound_match; -static Lisp_Object Vtreesit_str_pound_pred; +static Lisp_Object Vtreesit_str_pound_eq_question_mark; +static Lisp_Object Vtreesit_str_pound_match_question_mark; +static Lisp_Object Vtreesit_str_pound_pred_question_mark; static Lisp_Object Vtreesit_str_open_bracket; static Lisp_Object Vtreesit_str_close_bracket; static Lisp_Object Vtreesit_str_open_paren; static Lisp_Object Vtreesit_str_close_paren; static Lisp_Object Vtreesit_str_space; -static Lisp_Object Vtreesit_str_equal; -static Lisp_Object Vtreesit_str_match; -static Lisp_Object Vtreesit_str_pred; +static Lisp_Object Vtreesit_str_eq_question_mark; +static Lisp_Object Vtreesit_str_match_question_mark; +static Lisp_Object Vtreesit_str_pred_question_mark; static Lisp_Object Vtreesit_str_empty; /* This is the limit on recursion levels for some tree-sitter @@ -3471,12 +3471,12 @@ See Info node `(elisp)Pattern Matching' for detailed explanation. */) return Vtreesit_str_star; if (BASE_EQ (pattern, QCplus)) return Vtreesit_str_plus; - if (BASE_EQ (pattern, QCequal)) - return Vtreesit_str_pound_equal; - if (BASE_EQ (pattern, QCmatch)) - return Vtreesit_str_pound_match; - if (BASE_EQ (pattern, QCpred)) - return Vtreesit_str_pound_pred; + if (BASE_EQ (pattern, QCequal) || BASE_EQ (pattern, QCeq_q)) + return Vtreesit_str_pound_eq_question_mark; + if (BASE_EQ (pattern, QCmatch) || BASE_EQ (pattern, QCmatch_q)) + return Vtreesit_str_pound_match_question_mark; + if (BASE_EQ (pattern, QCpred) || BASE_EQ (pattern, QCpred_q)) + return Vtreesit_str_pound_pred_question_mark; Lisp_Object opening_delimeter = VECTORP (pattern) ? Vtreesit_str_open_bracket : Vtreesit_str_open_paren; @@ -3507,7 +3507,9 @@ A PATTERN in QUERY can be :* :+ :equal + :eq? :match + :match? (TYPE PATTERN...) [PATTERN...] FIELD-NAME: @@ -3670,7 +3672,7 @@ treesit_predicate_equal (Lisp_Object args, struct capture_range captures, return !NILP (Fstring_equal (text1, text2)); } -/* Handles predicate (#match "regexp" @node). Return true if "regexp" +/* Handles predicate (#match? "regexp" @node). Return true if "regexp" matches the text spanned by @node; return false otherwise. Matching is case-sensitive. If everything goes fine, don't touch SIGNAL_DATA; if error occurs, set it to a suitable signal data. */ @@ -3680,26 +3682,25 @@ treesit_predicate_match (Lisp_Object args, struct capture_range captures, { if (list_length (args) != 2) { - *signal_data = list2 (build_string ("Predicate `match' requires two " + *signal_data = list2 (build_string ("Predicate `match?' requires two " "arguments but got"), Flength (args)); return false; } - Lisp_Object regexp = XCAR (args); - Lisp_Object capture_name = XCAR (XCDR (args)); + Lisp_Object arg1 = XCAR (args); + Lisp_Object arg2 = XCAR (XCDR (args)); + Lisp_Object regexp = SYMBOLP (arg2) ? arg1 : arg2; + Lisp_Object capture_name = SYMBOLP (arg2) ? arg2 : arg1; + + if (!STRINGP (regexp) || !SYMBOLP (capture_name)) + { + *signal_data = list2 (build_string ("Predicate `match?' takes a regexp " + "and a node capture (order doesn't " + "matter), but got"), + Flength (args)); + return false; + } - /* It's probably common to get the argument order backwards. Catch - this mistake early and show helpful explanation, because Emacs - loves you. (We put the regexp first because that's what - string-match does.) */ - if (!STRINGP (regexp)) - xsignal1 (Qtreesit_query_error, - build_string ("The first argument to `match' should " - "be a regexp string, not a capture name")); - if (!SYMBOLP (capture_name)) - xsignal1 (Qtreesit_query_error, - build_string ("The second argument to `match' should " - "be a capture name, not a string")); Lisp_Object node = Qnil; if (!treesit_predicate_capture_name_to_node (capture_name, captures, &node, @@ -3783,11 +3784,11 @@ treesit_eval_predicates (struct capture_range captures, Lisp_Object predicates, Lisp_Object predicate = XCAR (tail); Lisp_Object fn = XCAR (predicate); Lisp_Object args = XCDR (predicate); - if (!NILP (Fstring_equal (fn, Vtreesit_str_equal))) + if (!NILP (Fstring_equal (fn, Vtreesit_str_eq_question_mark))) pass &= treesit_predicate_equal (args, captures, signal_data); - else if (!NILP (Fstring_equal (fn, Vtreesit_str_match))) + else if (!NILP (Fstring_equal (fn, Vtreesit_str_match_question_mark))) pass &= treesit_predicate_match (args, captures, signal_data); - else if (!NILP (Fstring_equal (fn, Vtreesit_str_pred))) + else if (!NILP (Fstring_equal (fn, Vtreesit_str_pred_question_mark))) pass &= treesit_predicate_pred (args, captures, signal_data); else { @@ -5175,8 +5176,11 @@ syms_of_treesit (void) DEFSYM (QCstar, ":*"); DEFSYM (QCplus, ":+"); DEFSYM (QCequal, ":equal"); + DEFSYM (QCeq_q, ":eq?"); DEFSYM (QCmatch, ":match"); + DEFSYM (QCmatch_q, ":match?"); DEFSYM (QCpred, ":pred"); + DEFSYM (QCpred_q, ":pred?"); DEFSYM (QCline, ":line"); DEFSYM (QCcol, ":col"); DEFSYM (QCpos, ":pos"); @@ -5357,12 +5361,12 @@ depending on customization of `treesit-enabled-modes'. */); Vtreesit_str_star = build_string ("*"); staticpro (&Vtreesit_str_plus); Vtreesit_str_plus = build_string ("+"); - staticpro (&Vtreesit_str_pound_equal); - Vtreesit_str_pound_equal = build_string ("#equal"); - staticpro (&Vtreesit_str_pound_match); - Vtreesit_str_pound_match = build_string ("#match"); - staticpro (&Vtreesit_str_pound_pred); - Vtreesit_str_pound_pred = build_string ("#pred"); + staticpro (&Vtreesit_str_pound_eq_question_mark); + Vtreesit_str_pound_eq_question_mark = build_string ("#eq?"); + staticpro (&Vtreesit_str_pound_match_question_mark); + Vtreesit_str_pound_match_question_mark = build_string ("#match?"); + staticpro (&Vtreesit_str_pound_pred_question_mark); + Vtreesit_str_pound_pred_question_mark = build_string ("#pred?"); staticpro (&Vtreesit_str_open_bracket); Vtreesit_str_open_bracket = build_string ("["); staticpro (&Vtreesit_str_close_bracket); @@ -5373,12 +5377,12 @@ depending on customization of `treesit-enabled-modes'. */); Vtreesit_str_close_paren = build_string (")"); staticpro (&Vtreesit_str_space); Vtreesit_str_space = build_string (" "); - staticpro (&Vtreesit_str_equal); - Vtreesit_str_equal = build_string ("equal"); - staticpro (&Vtreesit_str_match); - Vtreesit_str_match = build_string ("match"); - staticpro (&Vtreesit_str_pred); - Vtreesit_str_pred = build_string ("pred"); + staticpro (&Vtreesit_str_eq_question_mark); + Vtreesit_str_eq_question_mark = build_string ("eq?"); + staticpro (&Vtreesit_str_match_question_mark); + Vtreesit_str_match_question_mark = build_string ("match?"); + staticpro (&Vtreesit_str_pred_question_mark); + Vtreesit_str_pred_question_mark = build_string ("pred?"); staticpro (&Vtreesit_str_empty); Vtreesit_str_empty = build_string (""); diff --git a/test/src/treesit-tests.el b/test/src/treesit-tests.el index b5ea63a53f3..89303114735 100644 --- a/test/src/treesit-tests.el +++ b/test/src/treesit-tests.el @@ -547,10 +547,10 @@ BODY is the test body." ;; String query. '("(string) @string (pair key: (_) @keyword) -((_) @bob (#match \"\\\\`B.b\\\\'\" @bob)) +((_) @bob (#match? \"\\\\`B.b\\\\'\" @bob)) (number) @number -((number) @n3 (#equal \"3\" @n3)) -((number) @n3p (#pred treesit--ert-pred-last-sibling @n3p))" +((number) @n3 (#eq? \"3\" @n3)) +((number) @n3p (#pred? treesit--ert-pred-last-sibling @n3p))" ;; Sexp query. ((string) @string (pair key: (_) @keyword)