mirror of
git://git.sv.gnu.org/emacs.git
synced 2026-01-30 12:21:25 -08:00
Straighten regexp postfix operator after zero-width assertion parse
The zero-width assertions \` \' \b \B were parsed in a sloppy way so that a following postfix repetition operator could yield surprising results. For instance, "\\b*" would act as "\\b\\*", and "xy\\b*" would act as "\\(?:xy\\b\\)*". Except for \` and ^, any following postfix operator now applies to the zero-width assertion itself only, which is predictable and consistent with other assertions, if useless in practice. For historical compatibility, an operator character following \` and ^ always becomes a literal. (Bug#64128) * src/regex-emacs.c (regex_compile): Set `laststart` appropriately for each zero-width assertion instead of leaving it with whatever value it had before. Remove a redundant condition. * test/src/regex-emacs-tests.el (regexp-tests-zero-width-assertion-repetition): New test. * doc/lispref/searching.texi (Regexp Special): Say that repetition operators are not special after \`, and that they work as expected after other backslash escapes. * etc/NEWS: Announce.
This commit is contained in:
parent
dae8aab528
commit
be91192ecb
4 changed files with 88 additions and 7 deletions
|
|
@ -546,15 +546,11 @@ example, the regular expression that matches the @samp{\} character is
|
|||
|
||||
For historical compatibility, a repetition operator is treated as ordinary
|
||||
if it appears at the start of a regular expression
|
||||
or after @samp{^}, @samp{\(}, @samp{\(?:} or @samp{\|}.
|
||||
or after @samp{^}, @samp{\`}, @samp{\(}, @samp{\(?:} or @samp{\|}.
|
||||
For example, @samp{*foo} is treated as @samp{\*foo}, and
|
||||
@samp{two\|^\@{2\@}} is treated as @samp{two\|^@{2@}}.
|
||||
It is poor practice to depend on this behavior; use proper backslash
|
||||
escaping anyway, regardless of where the repetition operator appears.
|
||||
Also, a repetition operator should not immediately follow a backslash escape
|
||||
that matches only empty strings, as Emacs has bugs in this area.
|
||||
For example, it is unwise to use @samp{\b*}, which can be omitted
|
||||
without changing the documented meaning of the regular expression.
|
||||
|
||||
As a @samp{\} is not special inside a bracket expression, it can
|
||||
never remove the special meaning of @samp{-}, @samp{^} or @samp{]}.
|
||||
|
|
|
|||
8
etc/NEWS
8
etc/NEWS
|
|
@ -475,6 +475,14 @@ symbol, and either that symbol is ':eval' and the second element of
|
|||
the list evaluates to 'nil' or the symbol's value as a variable is
|
||||
'nil' or void.
|
||||
|
||||
+++
|
||||
** Regexp zero-width assertions followed by operators are better defined.
|
||||
Previously, regexps such as "xy\\B*" would have ill-defined behaviour.
|
||||
Now any operator following a zero-width assertion applies to that
|
||||
assertion only (which is useless). For historical compatibility, an
|
||||
operator character following '^' or '\`' becomes literal, but we
|
||||
advise against relying on this.
|
||||
|
||||
|
||||
* Lisp Changes in Emacs 30.1
|
||||
|
||||
|
|
|
|||
|
|
@ -1716,7 +1716,8 @@ regex_compile (re_char *pattern, ptrdiff_t size,
|
|||
|
||||
/* Address of start of the most recently finished expression.
|
||||
This tells, e.g., postfix * where to find the start of its
|
||||
operand. Reset at the beginning of groups and alternatives. */
|
||||
operand. Reset at the beginning of groups and alternatives,
|
||||
and after ^ and \` for dusty-deck compatibility. */
|
||||
unsigned char *laststart = 0;
|
||||
|
||||
/* Address of beginning of regexp, or inside of last group. */
|
||||
|
|
@ -1847,12 +1848,16 @@ regex_compile (re_char *pattern, ptrdiff_t size,
|
|||
case '^':
|
||||
if (! (p == pattern + 1 || at_begline_loc_p (pattern, p)))
|
||||
goto normal_char;
|
||||
/* Special case for compatibility: postfix ops after ^ become
|
||||
literals. */
|
||||
laststart = 0;
|
||||
BUF_PUSH (begline);
|
||||
break;
|
||||
|
||||
case '$':
|
||||
if (! (p == pend || at_endline_loc_p (p, pend)))
|
||||
goto normal_char;
|
||||
laststart = b;
|
||||
BUF_PUSH (endline);
|
||||
break;
|
||||
|
||||
|
|
@ -1892,7 +1897,7 @@ regex_compile (re_char *pattern, ptrdiff_t size,
|
|||
|
||||
/* Star, etc. applied to an empty pattern is equivalent
|
||||
to an empty pattern. */
|
||||
if (!laststart || laststart == b)
|
||||
if (laststart == b)
|
||||
break;
|
||||
|
||||
/* Now we know whether or not zero matches is allowed
|
||||
|
|
@ -2544,18 +2549,24 @@ regex_compile (re_char *pattern, ptrdiff_t size,
|
|||
break;
|
||||
|
||||
case 'b':
|
||||
laststart = b;
|
||||
BUF_PUSH (wordbound);
|
||||
break;
|
||||
|
||||
case 'B':
|
||||
laststart = b;
|
||||
BUF_PUSH (notwordbound);
|
||||
break;
|
||||
|
||||
case '`':
|
||||
/* Special case for compatibility: postfix ops after \` become
|
||||
literals, as for ^ (see above). */
|
||||
laststart = 0;
|
||||
BUF_PUSH (begbuf);
|
||||
break;
|
||||
|
||||
case '\'':
|
||||
laststart = b;
|
||||
BUF_PUSH (endbuf);
|
||||
break;
|
||||
|
||||
|
|
|
|||
|
|
@ -883,4 +883,70 @@ This evaluates the TESTS test cases from glibc."
|
|||
(should (looking-at "x*\\(=\\|:\\)*"))
|
||||
(should (looking-at "x*=*?"))))
|
||||
|
||||
(ert-deftest regexp-tests-zero-width-assertion-repetition ()
|
||||
;; Check compatibility behaviour with repetition operators after
|
||||
;; certain zero-width assertions (bug#64128).
|
||||
|
||||
;; This function is just to hide ugly regexps from relint so that it
|
||||
;; doesn't complain about them.
|
||||
(cl-flet ((smatch (re str) (string-match re str)))
|
||||
;; Postfix operators after ^ and \` become literals, for historical
|
||||
;; compatibility. Only the first character of a lazy operator (like *?)
|
||||
;; becomes a literal.
|
||||
(should (equal (smatch "^*a" "x\n*a") 2))
|
||||
(should (equal (smatch "^*?a" "x\n*a") 2))
|
||||
(should (equal (smatch "^*?a" "x\na") 2))
|
||||
(should (equal (smatch "^*?a" "x\n**a") nil))
|
||||
|
||||
(should (equal (smatch "\\`*a" "*a") 0))
|
||||
(should (equal (smatch "\\`*?a" "*a") 0))
|
||||
(should (equal (smatch "\\`*?a" "a") 0))
|
||||
(should (equal (smatch "\\`*?a" "**a") nil))
|
||||
|
||||
;; Other zero-width assertions are treated as normal elements, so postfix
|
||||
;; operators apply to them alone (which is pointless but valid).
|
||||
(should (equal (smatch "\\b*!" "*!") 1))
|
||||
(should (equal (smatch "!\\b+;" "!;") nil))
|
||||
(should (equal (smatch "!\\b+a" "!a") 0))
|
||||
|
||||
(should (equal (smatch "\\B*!" "*!") 1))
|
||||
(should (equal (smatch "!\\B+;" "!;") 0))
|
||||
(should (equal (smatch "!\\B+a" "!a") nil))
|
||||
|
||||
(should (equal (smatch "\\<*b" "*b") 1))
|
||||
(should (equal (smatch "a\\<*b" "ab") 0))
|
||||
(should (equal (smatch ";\\<*b" ";b") 0))
|
||||
(should (equal (smatch "a\\<+b" "ab") nil))
|
||||
(should (equal (smatch ";\\<+b" ";b") 0))
|
||||
|
||||
(should (equal (smatch "\\>*;" "*;") 1))
|
||||
(should (equal (smatch "a\\>*b" "ab") 0))
|
||||
(should (equal (smatch "a\\>*;" "a;") 0))
|
||||
(should (equal (smatch "a\\>+b" "ab") nil))
|
||||
(should (equal (smatch "a\\>+;" "a;") 0))
|
||||
|
||||
(should (equal (smatch "a\\'" "ab") nil))
|
||||
(should (equal (smatch "b\\'" "ab") 1))
|
||||
(should (equal (smatch "a\\'*b" "ab") 0))
|
||||
(should (equal (smatch "a\\'+" "ab") nil))
|
||||
(should (equal (smatch "b\\'+" "ab") 1))
|
||||
(should (equal (smatch "\\'+" "+") 1))
|
||||
|
||||
(should (equal (smatch "\\_<*b" "*b") 1))
|
||||
(should (equal (smatch "a\\_<*b" "ab") 0))
|
||||
(should (equal (smatch " \\_<*b" " b") 0))
|
||||
(should (equal (smatch "a\\_<+b" "ab") nil))
|
||||
(should (equal (smatch " \\_<+b" " b") 0))
|
||||
|
||||
(should (equal (smatch "\\_>*;" "*;") 1))
|
||||
(should (equal (smatch "a\\_>*b" "ab") 0))
|
||||
(should (equal (smatch "a\\_>* " "a ") 0))
|
||||
(should (equal (smatch "a\\_>+b" "ab") nil))
|
||||
(should (equal (smatch "a\\_>+ " "a ") 0))
|
||||
|
||||
(should (equal (smatch "\\=*b" "*b") 1))
|
||||
(should (equal (smatch "a\\=*b" "a*b") nil))
|
||||
(should (equal (smatch "a\\=*b" "ab") 0))
|
||||
))
|
||||
|
||||
;;; regex-emacs-tests.el ends here
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue