From 075e77ac449386c07a4b90e052d1bef7c6a38771 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Engdeg=C3=A5rd?= Date: Mon, 19 Jun 2023 10:25:15 +0200 Subject: [PATCH 1/7] * src/regex-emacs.c (regex_compile): Remove useless condition This condition didn't have any visible effect (as ^ is not a repetition operator); all it did was to generate slightly worse regexp bytecode for out-of-place (literal) ^ in the midst of literal characters (bug#64128). --- src/regex-emacs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/regex-emacs.c b/src/regex-emacs.c index e3237cd425a..fea34df991b 100644 --- a/src/regex-emacs.c +++ b/src/regex-emacs.c @@ -2597,7 +2597,7 @@ regex_compile (re_char *pattern, ptrdiff_t size, /* If followed by a repetition operator. */ || (p != pend - && (*p == '*' || *p == '+' || *p == '?' || *p == '^')) + && (*p == '*' || *p == '+' || *p == '?')) || (p + 1 < pend && p[0] == '\\' && p[1] == '{')) { /* Start building a new exactn. */ From c5f819aa03427c0e2b86131d44fa55874da2b03e Mon Sep 17 00:00:00 2001 From: Michael Albinus Date: Tue, 13 Dec 2022 20:06:48 +0100 Subject: [PATCH 2/7] Adapt Tramp specific tests in eglot-tests.el * test/lisp/progmodes/eglot-tests.el (tramp): Move up. (tramp-sh): Don't require. (eglot--cleanup-after-test): Delete Tramp buffers if needed. (eglot-tests-lsp-abiding-column): Rename from `eglot-lsp-abiding-column'. (tramp-histfile-override): Declare. (eglot--call-with-tramp-test): Use `ert-remote-temporary-file-directory'. Skip if needed. (eglot--tramp-test, eglot--tramp-test-2): Don't skip. --- test/lisp/progmodes/eglot-tests.el | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/lisp/progmodes/eglot-tests.el b/test/lisp/progmodes/eglot-tests.el index 518f8810bdf..725b877fd3c 100644 --- a/test/lisp/progmodes/eglot-tests.el +++ b/test/lisp/progmodes/eglot-tests.el @@ -1237,8 +1237,6 @@ GUESSED-MAJOR-MODES-SYM are bound to the useful return values of (defvar tramp-histfile-override) (defun eglot--call-with-tramp-test (fn) - (unless (>= emacs-major-version 27) - (ert-skip "Eglot Tramp support only on Emacs >= 27")) ;; Set up a Tramp method that’s just a shell so the remote host is ;; really just the local host. (let* ((tramp-remote-path (cons 'tramp-own-remote-path @@ -1260,6 +1258,9 @@ GUESSED-MAJOR-MODES-SYM are bound to the useful return values of (when (and noninteractive (not (file-directory-p "~/"))) (setenv "HOME" temporary-file-directory))))) (default-directory temporary-file-directory)) + ;; We must check the remote LSP server. So far, just "clangd" is used. + (unless (ignore-errors (executable-find "clangd" 'remote)) + (ert-skip "Remote clangd not found")) (funcall fn))) (ert-deftest eglot-test-tramp-test () From d84b026dbefce6604a35a83131649291a74fda67 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Mon, 19 Jun 2023 11:09:00 -0700 Subject: [PATCH 3/7] Document regular expression special cases better In particular, document that escape sequences like \b* are currently buggy. --- doc/lispref/searching.texi | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/doc/lispref/searching.texi b/doc/lispref/searching.texi index b8d9094b28d..3970faebbf3 100644 --- a/doc/lispref/searching.texi +++ b/doc/lispref/searching.texi @@ -505,9 +505,10 @@ beginning of a line. When matching a string instead of a buffer, @samp{^} matches at the beginning of the string or after a newline character. -For historical compatibility reasons, @samp{^} can be used only at the -beginning of the regular expression, or after @samp{\(}, @samp{\(?:} -or @samp{\|}. +For historical compatibility, @samp{^} is special only at the beginning +of the regular expression, or after @samp{\(}, @samp{\(?:} or @samp{\|}. +Although @samp{^} is an ordinary character in other contexts, +it is good practice to use @samp{\^} even then. @item @samp{$} @cindex @samp{$} in regexp @@ -519,8 +520,10 @@ matches a string of one @samp{x} or more at the end of a line. When matching a string instead of a buffer, @samp{$} matches at the end of the string or before a newline character. -For historical compatibility reasons, @samp{$} can be used only at the +For historical compatibility, @samp{$} is special only at the end of the regular expression, or before @samp{\)} or @samp{\|}. +Although @samp{$} is an ordinary character in other contexts, +it is good practice to use @samp{\$} even then. @item @samp{\} @cindex @samp{\} in regexp @@ -540,12 +543,17 @@ example, the regular expression that matches the @samp{\} character is @samp{\} is @code{"\\\\"}. @end table -@strong{Please note:} For historical compatibility, special characters -are treated as ordinary ones if they are in contexts where their special -meanings make no sense. For example, @samp{*foo} treats @samp{*} as -ordinary since there is no preceding expression on which the @samp{*} -can act. It is poor practice to depend on this behavior; quote the -special character anyway, regardless of where it appears. +For historical compatibility, a repetition operator is treated as ordinary +if it appears at the start of a regular expression +or after @samp{^}, @samp{\(}, @samp{\(?:} or @samp{\|}. +For example, @samp{*foo} is treated as @samp{\*foo}, and +@samp{two\|^\@{2\@}} is treated as @samp{two\|^@{2@}}. +It is poor practice to depend on this behavior; use proper backslash +escaping anyway, regardless of where the repetition operator appears. +Also, a repetition operator should not immediately follow a backslash escape +that matches only empty strings, as Emacs has bugs in this area. +For example, it is unwise to use @samp{\b*}, which can be omitted +without changing the documented meaning of the regular expression. As a @samp{\} is not special inside a character alternative, it can never remove the special meaning of @samp{-}, @samp{^} or @samp{]}. From 5dfe3f21d12a107055fb447be58b94be98c2f628 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Mon, 19 Jun 2023 11:09:00 -0700 Subject: [PATCH 4/7] Document Emacs vs POSIX REs * doc/lispref/searching.texi (Longest Match): Rename from POSIX Regexps, as this section is about longest-match functions, not about POSIX regexps. (POSIX Regexps): New section. --- doc/lispref/searching.texi | 105 +++++++++++++++++++++++++++++++++++-- 1 file changed, 101 insertions(+), 4 deletions(-) diff --git a/doc/lispref/searching.texi b/doc/lispref/searching.texi index 3970faebbf3..608abae762c 100644 --- a/doc/lispref/searching.texi +++ b/doc/lispref/searching.texi @@ -18,11 +18,12 @@ portions of it. * Searching and Case:: Case-independent or case-significant searching. * Regular Expressions:: Describing classes of strings. * Regexp Search:: Searching for a match for a regexp. -* POSIX Regexps:: Searching POSIX-style for the longest match. +* Longest Match:: Searching for the longest match. * Match Data:: Finding out which part of the text matched, after a string or regexp search. * Search and Replace:: Commands that loop, searching and replacing. * Standard Regexps:: Useful regexps for finding sentences, pages,... +* POSIX Regexps:: Emacs regexps vs POSIX regexps. @end menu The @samp{skip-chars@dots{}} functions also perform a kind of searching. @@ -2201,8 +2202,8 @@ constructs, you should bind it temporarily for as small as possible a part of the code. @end defvar -@node POSIX Regexps -@section POSIX Regular Expression Searching +@node Longest Match +@section Longest-match searching for regular expression matches @cindex backtracking and POSIX regular expressions The usual regular expression functions do backtracking when necessary @@ -2217,7 +2218,9 @@ possibilities and found all matches, so they can report the longest match, as required by POSIX@. This is much slower, so use these functions only when you really need the longest match. - The POSIX search and match functions do not properly support the + Despite their names, the POSIX search and match functions +use Emacs regular expressions, not POSIX regular expressions. +@xref{POSIX Regexps}. Also, they do not properly support the non-greedy repetition operators (@pxref{Regexp Special, non-greedy}). This is because POSIX backtracking conflicts with the semantics of non-greedy repetition. @@ -2965,3 +2968,97 @@ values of the variables @code{sentence-end-double-space} @code{sentence-end-without-period}, and @code{sentence-end-without-space}. @end defun + +@node POSIX Regexps +@section Emacs versus POSIX Regular Expressions +@cindex POSIX regular expressions + +Regular expression syntax varies signficantly among computer programs. +When writing Elisp code that generates regular expressions for use by other +programs, it is helpful to know how syntax variants differ. +To give a feel for the variation, this section discusses how +Emacs regular expressions differ from two syntax variants standarded by POSIX: +basic regular expressions (BREs) and extended regular expressions (EREs). +Plain @command{grep} uses BREs, and @samp{grep -E} uses EREs. + +Emacs regular expressions have a syntax closer to EREs than to BREs, +with some extensions. Here is a summary of how POSIX BREs and EREs +differ from Emacs regular expressions. + +@itemize @bullet +@item +In POSIX BREs @samp{+} and @samp{?} are not special. +The only backslash escape sequences are @samp{\(@dots{}\)}, +@samp{\@{@dots{}\@}}, @samp{\1} through @samp{\9}, along with the +escaped special characters @samp{\$}, @samp{\*}, @samp{\.}, @samp{\[}, +@samp{\\}, and @samp{\^}. +Therefore @samp{\(?:} acts like @samp{\([?]:}. +POSIX does not define how other BRE escapes behave; +for example, GNU @command{grep} treats @samp{\|} like Emacs does, +but does not support all the Emacs escapes. + +@item +In POSIX EREs @samp{@{}, @samp{(} and @samp{|} are special, +and @samp{)} is special when matched with a preceding @samp{(}. +These special characters do not use preceding backslashes; +@samp{(?} produces undefined results. +The only backslash escape sequences are the escaped special characters +@samp{\$}, @samp{\(}, @samp{\)}, @samp{\*}, @samp{\+}, @samp{\.}, +@samp{\?}, @samp{\[}, @samp{\\}, @samp{\^}, @samp{\@{} and @samp{\|}. +POSIX does not define how other ERE escapes behave; +for example, GNU @samp{grep -E} treats @samp{\1} like Emacs does, +but does not support all the Emacs escapes. + +@item +In POSIX BREs, it is an implementation option whether @samp{^} is special +after @samp{\(}; GNU @command{grep} treats it like Emacs does. +In POSIX EREs, @samp{^} is always special outside of character alternatives, +which means the ERE @samp{x^} never matches. +In Emacs regular expressions, @samp{^} is special only at the +beginning of the regular expression, or after @samp{\(}, @samp{\(?:} +or @samp{\|}. + +@item +In POSIX BREs, it is an implementation option whether @samp{$} is special +before @samp{\)}; GNU @command{grep} treats it like Emacs does. +In POSIX EREs, @samp{$} is always special outside of character alternatives, +which means the ERE @samp{$x} never matches. +In Emacs regular expressions, @samp{$} is special only at the +end of the regular expression, or before @samp{\)} or @samp{\|}. + +@item +In POSIX BREs and EREs, undefined results are produced by repetition +operators at the start of a regular expression or subexpression +(possibly preceded by @samp{^}), except that the repetition operator +@samp{*} has the same behavior in BREs as in Emacs. +In Emacs, these operators are treated as ordinary. + +@item +In BREs and EREs, undefined results are produced by two repetition +operators in sequence. In Emacs, these have well-defined behavior, +e.g., @samp{a**} is equivalent to @samp{a*}. + +@item +In BREs and EREs, undefined results are produced by empty regular +expressions or subexpressions. In Emacs these have well-defined +behavior, e.g., @samp{\(\)*} matches the empty string, + +@item +In BREs and EREs, undefined results are produced for the named +character classes @samp{[:ascii:]}, @samp{[:multibyte:]}, +@samp{[:nonascii:]}, @samp{[:unibyte:]}, and @samp{[:word:]}. + +@item +BRE and ERE alternatives can contain collating symbols and equivalence +class expressions, e.g., @samp{[[.ch.]d[=a=]]}. +Emacs regular expressions do not support this. + +@item +BREs, EREs, and the strings they match cannot contain encoding errors +or NUL bytes. In Emacs these constructs simply match themselves. + +@item +BRE and ERE searching always finds the longest match. +Emacs searching by default does not necessarily do so. +@xref{Longest Match}. +@end itemize From 94d8eeeff4ae99cb12718dab7cf7fdc52de77b6e Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Mon, 19 Jun 2023 11:09:00 -0700 Subject: [PATCH 5/7] =?UTF-8?q?Call=20them=20=E2=80=9Cbracket=20expression?= =?UTF-8?q?s=E2=80=9D=20more=20consistently?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Emacs comments and doc were inconsistent about the name used for regexps like [a-z]. Sometimes it called them “character alternatives”, sometimes “character sets”, sometimes “bracket expressions”. Prefer “bracket expressions” as it is less confusing: POSIX and most other programs’ doc uses “bracket expressions”, “alternative” is also used in the Emacs documentation to talk about ...\|... in regexps, and “character set” normally has a different meaning in Emacs. --- doc/emacs/search.texi | 12 +++--- doc/lispref/searching.texi | 74 ++++++++++++++++++------------------ lisp/emacs-lisp/lisp-mode.el | 2 +- lisp/textmodes/picture.el | 2 +- 4 files changed, 45 insertions(+), 45 deletions(-) diff --git a/doc/emacs/search.texi b/doc/emacs/search.texi index 45378d95f65..2a816221235 100644 --- a/doc/emacs/search.texi +++ b/doc/emacs/search.texi @@ -950,8 +950,8 @@ features used mainly in Lisp programs. @dfn{special constructs} and the rest are @dfn{ordinary}. An ordinary character matches that same character and nothing else. The special characters are @samp{$^.*+?[\}. The character @samp{]} is special if -it ends a character alternative (see below). The character @samp{-} -is special inside a character alternative. Any other character +it ends a bracket expression (see below). The character @samp{-} +is special inside a bracket expression. Any other character appearing in a regular expression is ordinary, unless a @samp{\} precedes it. (When you use regular expressions in a Lisp program, each @samp{\} must be doubled, see the example near the end of this @@ -1033,11 +1033,11 @@ you search for @samp{a.*?$} against the text @samp{abbab} followed by a newline, it matches the whole string. Since it @emph{can} match starting at the first @samp{a}, it does. +@cindex bracket expression @cindex set of alternative characters, in regular expressions @cindex character set, in regular expressions @item @kbd{[ @dots{} ]} -is a @dfn{set of alternative characters}, or a @dfn{character set}, -beginning with @samp{[} and terminated by @samp{]}. +is a @dfn{bracket expression}, which matches one of a set of characters. In the simplest case, the characters between the two brackets are what this set can match. Thus, @samp{[ad]} matches either one @samp{a} or @@ -1057,7 +1057,7 @@ Greek letters. @cindex character classes, in regular expressions You can also include certain special @dfn{character classes} in a character set. A @samp{[:} and balancing @samp{:]} enclose a -character class inside a set of alternative characters. For instance, +character class inside a bracket expression. For instance, @samp{[[:alnum:]]} matches any letter or digit. @xref{Char Classes,,, elisp, The Emacs Lisp Reference Manual}, for a list of character classes. @@ -1125,7 +1125,7 @@ no preceding expression on which the @samp{*} can act. It is poor practice to depend on this behavior; it is better to quote the special character anyway, regardless of where it appears. -As a @samp{\} is not special inside a set of alternative characters, it can +As a @samp{\} is not special inside a bracket expression, it can never remove the special meaning of @samp{-}, @samp{^} or @samp{]}. You should not quote these characters when they have no special meaning. This would not clarify anything, since backslashes diff --git a/doc/lispref/searching.texi b/doc/lispref/searching.texi index 608abae762c..28230cea643 100644 --- a/doc/lispref/searching.texi +++ b/doc/lispref/searching.texi @@ -278,10 +278,10 @@ character is a simple regular expression that matches that character and nothing else. The special characters are @samp{.}, @samp{*}, @samp{+}, @samp{?}, @samp{[}, @samp{^}, @samp{$}, and @samp{\}; no new special characters will be defined in the future. The character -@samp{]} is special if it ends a character alternative (see later). -The character @samp{-} is special inside a character alternative. A +@samp{]} is special if it ends a bracket expression (see later). +The character @samp{-} is special inside a bracket expression. A @samp{[:} and balancing @samp{:]} enclose a character class inside a -character alternative. Any other character appearing in a regular +bracket expression. Any other character appearing in a regular expression is ordinary, unless a @samp{\} precedes it. For example, @samp{f} is not a special character, so it is ordinary, and @@ -374,19 +374,19 @@ expression @samp{c[ad]*?a}, applied to that same string, matches just permits the whole expression to match is @samp{d}.) @item @samp{[ @dots{} ]} -@cindex character alternative (in regexp) +@cindex bracket expression (in regexp) @cindex @samp{[} in regexp @cindex @samp{]} in regexp -is a @dfn{character alternative}, which begins with @samp{[} and is +is a @dfn{bracket expression}, which begins with @samp{[} and is terminated by @samp{]}. In the simplest case, the characters between -the two brackets are what this character alternative can match. +the two brackets are what this bracket expression can match. Thus, @samp{[ad]} matches either one @samp{a} or one @samp{d}, and @samp{[ad]*} matches any string composed of just @samp{a}s and @samp{d}s (including the empty string). It follows that @samp{c[ad]*r} matches @samp{cr}, @samp{car}, @samp{cdr}, @samp{caddaar}, etc. -You can also include character ranges in a character alternative, by +You can also include character ranges in a bracket expression, by writing the starting and ending characters with a @samp{-} between them. Thus, @samp{[a-z]} matches any lower-case @acronym{ASCII} letter. Ranges may be intermixed freely with individual characters, as in @@ -395,7 +395,7 @@ or @samp{$}, @samp{%} or period. However, the ending character of one range should not be the starting point of another one; for example, @samp{[a-m-z]} should be avoided. -A character alternative can also specify named character classes +A bracket expression can also specify named character classes (@pxref{Char Classes}). For example, @samp{[[:ascii:]]} matches any @acronym{ASCII} character. Using a character class is equivalent to mentioning each of the characters in that class; but the latter is not @@ -404,9 +404,9 @@ different characters. A character class should not appear as the lower or upper bound of a range. The usual regexp special characters are not special inside a -character alternative. A completely different set of characters is +bracket expression. A completely different set of characters is special: @samp{]}, @samp{-} and @samp{^}. -To include @samp{]} in a character alternative, put it at the +To include @samp{]} in a bracket expression, put it at the beginning. To include @samp{^}, put it anywhere but at the beginning. To include @samp{-}, put it at the end. Thus, @samp{[]^-]} matches all three of these special characters. You cannot use @samp{\} to @@ -444,7 +444,7 @@ characters and raw 8-bit bytes, but not non-ASCII characters. This feature is intended for searching text in unibyte buffers and strings. @end enumerate -Some kinds of character alternatives are not the best style even +Some kinds of bracket expressions are not the best style even though they have a well-defined meaning in Emacs. They include: @enumerate @@ -458,7 +458,7 @@ Unicode character escapes can help here; for example, for most programmers @samp{[ก-ฺ฿-๛]} is less clear than @samp{[\u0E01-\u0E3A\u0E3F-\u0E5B]}. @item -Although a character alternative can include duplicates, it is better +Although a bracket expression can include duplicates, it is better style to avoid them. For example, @samp{[XYa-yYb-zX]} is less clear than @samp{[XYa-z]}. @@ -469,30 +469,30 @@ is simpler to list the characters. For example, than @samp{[ij]}, and @samp{[i-k]} is less clear than @samp{[ijk]}. @item -Although a @samp{-} can appear at the beginning of a character -alternative or as the upper bound of a range, it is better style to -put @samp{-} by itself at the end of a character alternative. For +Although a @samp{-} can appear at the beginning of a bracket +expression or as the upper bound of a range, it is better style to +put @samp{-} by itself at the end of a bracket expression. For example, although @samp{[-a-z]} is valid, @samp{[a-z-]} is better style; and although @samp{[*--]} is valid, @samp{[*+,-]} is clearer. @end enumerate @item @samp{[^ @dots{} ]} @cindex @samp{^} in regexp -@samp{[^} begins a @dfn{complemented character alternative}. This +@samp{[^} begins a @dfn{complemented bracket expression}. This matches any character except the ones specified. Thus, @samp{[^a-z0-9A-Z]} matches all characters @emph{except} ASCII letters and digits. -@samp{^} is not special in a character alternative unless it is the first +@samp{^} is not special in a bracket expression unless it is the first character. The character following the @samp{^} is treated as if it were first (in other words, @samp{-} and @samp{]} are not special there). -A complemented character alternative can match a newline, unless newline is +A complemented bracket expression can match a newline, unless newline is mentioned as one of the characters not to match. This is in contrast to the handling of regexps in programs such as @code{grep}. -You can specify named character classes, just like in character -alternatives. For instance, @samp{[^[:ascii:]]} matches any +You can specify named character classes, just like in bracket +expressions. For instance, @samp{[^[:ascii:]]} matches any non-@acronym{ASCII} character. @xref{Char Classes}. @item @samp{^} @@ -556,7 +556,7 @@ that matches only empty strings, as Emacs has bugs in this area. For example, it is unwise to use @samp{\b*}, which can be omitted without changing the documented meaning of the regular expression. -As a @samp{\} is not special inside a character alternative, it can +As a @samp{\} is not special inside a bracket expression, it can never remove the special meaning of @samp{-}, @samp{^} or @samp{]}. You should not quote these characters when they have no special meaning. This would not clarify anything, since backslashes @@ -565,23 +565,23 @@ special meaning, as in @samp{[^\]} (@code{"[^\\]"} for Lisp string syntax), which matches any single character except a backslash. In practice, most @samp{]} that occur in regular expressions close a -character alternative and hence are special. However, occasionally a +bracket expression and hence are special. However, occasionally a regular expression may try to match a complex pattern of literal @samp{[} and @samp{]}. In such situations, it sometimes may be necessary to carefully parse the regexp from the start to determine -which square brackets enclose a character alternative. For example, -@samp{[^][]]} consists of the complemented character alternative +which square brackets enclose a bracket expression. For example, +@samp{[^][]]} consists of the complemented bracket expression @samp{[^][]} (which matches any single character that is not a square bracket), followed by a literal @samp{]}. The exact rules are that at the beginning of a regexp, @samp{[} is special and @samp{]} not. This lasts until the first unquoted -@samp{[}, after which we are in a character alternative; @samp{[} is +@samp{[}, after which we are in a bracket expression; @samp{[} is no longer special (except when it starts a character class) but @samp{]} is special, unless it immediately follows the special @samp{[} or that @samp{[} followed by a @samp{^}. This lasts until the next special -@samp{]} that does not end a character class. This ends the character -alternative and restores the ordinary syntax of regular expressions; +@samp{]} that does not end a character class. This ends the bracket +expression and restores the ordinary syntax of regular expressions; an unquoted @samp{[} is special again and a @samp{]} not. @node Char Classes @@ -592,8 +592,8 @@ an unquoted @samp{[} is special again and a @samp{]} not. @cindex alpha character class, regexp @cindex xdigit character class, regexp - Below is a table of the classes you can use in a character -alternative, and what they mean. Note that the @samp{[} and @samp{]} + Below is a table of the classes you can use in a bracket +expression, and what they mean. Note that the @samp{[} and @samp{]} characters that enclose the class name are part of the name, so a regular expression using these classes needs one more pair of brackets. For example, a regular expression matching a sequence of @@ -920,7 +920,7 @@ with a symbol-constituent character. @kindex invalid-regexp Not every string is a valid regular expression. For example, a string -that ends inside a character alternative without a terminating @samp{]} +that ends inside a bracket expression without a terminating @samp{]} is invalid, and so is a string that ends with a single @samp{\}. If an invalid regular expression is passed to any of the search functions, an @code{invalid-regexp} error is signaled. @@ -957,7 +957,7 @@ deciphered as follows: @table @code @item [.?!] -The first part of the pattern is a character alternative that matches +The first part of the pattern is a bracket expression that matches any one of three characters: period, question mark, and exclamation mark. The match must begin with one of these three characters. (This is one point where the new default regexp used by Emacs differs from @@ -969,7 +969,7 @@ The second part of the pattern matches any closing braces and quotation marks, zero or more of them, that may follow the period, question mark or exclamation mark. The @code{\"} is Lisp syntax for a double-quote in a string. The @samp{*} at the end indicates that the immediately -preceding regular expression (a character alternative, in this case) may be +preceding regular expression (a bracket expression, in this case) may be repeated zero or more times. @item \\($\\|@ $\\|\t\\|@ @ \\) @@ -1920,7 +1920,7 @@ attempts. Other zero-width assertions may also bring benefits by causing a match to fail early. @item -Avoid or-patterns in favor of character alternatives: write +Avoid or-patterns in favor of bracket expressions: write @samp{[ab]} instead of @samp{a\|b}. Recall that @samp{\s-} and @samp{\sw} are equivalent to @samp{[[:space:]]} and @samp{[[:word:]]}, respectively. @@ -3012,7 +3012,7 @@ but does not support all the Emacs escapes. @item In POSIX BREs, it is an implementation option whether @samp{^} is special after @samp{\(}; GNU @command{grep} treats it like Emacs does. -In POSIX EREs, @samp{^} is always special outside of character alternatives, +In POSIX EREs, @samp{^} is always special outside of bracket expressions, which means the ERE @samp{x^} never matches. In Emacs regular expressions, @samp{^} is special only at the beginning of the regular expression, or after @samp{\(}, @samp{\(?:} @@ -3021,7 +3021,7 @@ or @samp{\|}. @item In POSIX BREs, it is an implementation option whether @samp{$} is special before @samp{\)}; GNU @command{grep} treats it like Emacs does. -In POSIX EREs, @samp{$} is always special outside of character alternatives, +In POSIX EREs, @samp{$} is always special outside of bracket expressions, which means the ERE @samp{$x} never matches. In Emacs regular expressions, @samp{$} is special only at the end of the regular expression, or before @samp{\)} or @samp{\|}. @@ -3049,8 +3049,8 @@ character classes @samp{[:ascii:]}, @samp{[:multibyte:]}, @samp{[:nonascii:]}, @samp{[:unibyte:]}, and @samp{[:word:]}. @item -BRE and ERE alternatives can contain collating symbols and equivalence -class expressions, e.g., @samp{[[.ch.]d[=a=]]}. +BREs and EREs can contain collating symbols and equivalence +class expressions within bracket expressions, e.g., @samp{[[.ch.]d[=a=]]}. Emacs regular expressions do not support this. @item diff --git a/lisp/emacs-lisp/lisp-mode.el b/lisp/emacs-lisp/lisp-mode.el index 9914ededb85..1990630608d 100644 --- a/lisp/emacs-lisp/lisp-mode.el +++ b/lisp/emacs-lisp/lisp-mode.el @@ -1453,7 +1453,7 @@ and initial semicolons." ;; are buffer-local, but we avoid changing them so that they can be set ;; to make `forward-paragraph' and friends do something the user wants. ;; - ;; `paragraph-start': The `(' in the character alternative and the + ;; `paragraph-start': The `(' in the bracket expression and the ;; left-singlequote plus `(' sequence after the \\| alternative prevent ;; sexps and backquoted sexps that follow a docstring from being filled ;; with the docstring. This setting has the consequence of inhibiting diff --git a/lisp/textmodes/picture.el b/lisp/textmodes/picture.el index 9aa9b72c513..f98c3963b6f 100644 --- a/lisp/textmodes/picture.el +++ b/lisp/textmodes/picture.el @@ -383,7 +383,7 @@ Interactively, ARG is the numeric argument, and defaults to 1." The syntax for this variable is like the syntax used inside of `[...]' in a regular expression--but without the `[' and the `]'. It is NOT a regular expression, and should follow the usual -rules for the contents of a character alternative. +rules for the contents of a bracket expression. It defines a set of \"interesting characters\" to look for when setting \(or searching for) tab stops, initially \"!-~\" (all printing characters). For example, suppose that you are editing a table which is formatted thus: From ef2a9b9779f87438958830747e97b0a37804a3d6 Mon Sep 17 00:00:00 2001 From: Jim Porter Date: Mon, 19 Jun 2023 13:45:37 -0700 Subject: [PATCH 6/7] ; Improve 'rx' form from edb0862f5e69 Thanks to Michael Albinus for the suggestion. * test/lisp/eshell/esh-util-tests.el (esh-util-test/eshell-stringify/list): Use 'any' instead of 'or' in 'rx' form. --- test/lisp/eshell/esh-util-tests.el | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/lisp/eshell/esh-util-tests.el b/test/lisp/eshell/esh-util-tests.el index 52b42fe915c..8585677e14e 100644 --- a/test/lisp/eshell/esh-util-tests.el +++ b/test/lisp/eshell/esh-util-tests.el @@ -52,7 +52,7 @@ ;; no leading/trailing whitespace. (should (equal (eshell-stringify '(1 2 3)) "(1 2 3)")) (should (equal (replace-regexp-in-string - (rx (+ (or space "\n"))) " " + (rx (+ (any space "\n"))) " " (eshell-stringify '((1 2) (3 . 4)))) "((1 2) (3 . 4))"))) From 1e13610b75718e7904f8af181fb73571639e1211 Mon Sep 17 00:00:00 2001 From: Andrew G Cohen Date: Tue, 20 Jun 2023 07:59:51 +0800 Subject: [PATCH 7/7] Don't read all headers in gnus thread referral (bug#63842) Gnus thread referral tries to add newly found articles to an existing summary buffer. When the list of such articles is known from searching we can speed things up by only entering these specific headers into the dependencies table. * lisp/gnus/gnus-sum.el (gnus-summary-refer-thread): Don't set `gnus-read-all-available-headers' when finding articles by searching. --- lisp/gnus/gnus-sum.el | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lisp/gnus/gnus-sum.el b/lisp/gnus/gnus-sum.el index 4effaa981ec..a3be5577f7a 100644 --- a/lisp/gnus/gnus-sum.el +++ b/lisp/gnus/gnus-sum.el @@ -9029,7 +9029,6 @@ is non-numeric or nil fetch the number specified by the (id (mail-header-id header)) (gnus-inhibit-demon t) (gnus-summary-ignore-duplicates t) - (gnus-read-all-available-headers t) (gnus-refer-thread-use-search (if (or (null limit) (numberp limit)) gnus-refer-thread-use-search @@ -9049,7 +9048,8 @@ is non-numeric or nil fetch the number specified by the (gnus-search-thread header)) ;; Otherwise just retrieve some headers. (t - (let* ((limit (if (numberp limit) + (let* ((gnus-read-all-available-headers t) + (limit (if (numberp limit) limit gnus-refer-thread-limit)) (last (if (numberp limit)