mirror of
git://git.sv.gnu.org/emacs.git
synced 2026-01-30 04:10:54 -08:00
Allow characters and single-char strings in rx charsets
The `not' and `intersection' forms, and `or' inside these forms, now accept characters and single-character strings as arguments. Previously, they had to be wrapped in `any' forms. This does not add expressive power but is a convenience and is easily understood. * doc/lispref/searching.texi (Rx Constructs): Amend the documentation. * etc/NEWS: Announce the change. * lisp/emacs-lisp/rx.el (rx--charset-p, rx--translate-not) (rx--charset-intervals, rx): Accept characters and 1-char strings in more places. * test/lisp/emacs-lisp/rx-tests.el (rx-not, rx-charset-or) (rx-def-in-charset-or, rx-intersection): Test the change.
This commit is contained in:
parent
b04086adf6
commit
82b4e48c59
4 changed files with 43 additions and 17 deletions
|
|
@ -1214,8 +1214,9 @@ Corresponding string regexp: @samp{[@dots{}]}
|
|||
@item @code{(not @var{charspec})}
|
||||
@cindex @code{not} in rx
|
||||
Match a character not included in @var{charspec}. @var{charspec} can
|
||||
be an @code{any}, @code{not}, @code{or}, @code{intersection},
|
||||
@code{syntax} or @code{category} form, or a character class.
|
||||
be a character, a single-character string, an @code{any}, @code{not},
|
||||
@code{or}, @code{intersection}, @code{syntax} or @code{category} form,
|
||||
or a character class.
|
||||
If @var{charspec} is an @code{or} form, its arguments have the same
|
||||
restrictions as those of @code{intersection}; see below.@*
|
||||
Corresponding string regexp: @samp{[^@dots{}]}, @samp{\S@var{code}},
|
||||
|
|
@ -1224,9 +1225,9 @@ Corresponding string regexp: @samp{[^@dots{}]}, @samp{\S@var{code}},
|
|||
@item @code{(intersection @var{charset}@dots{})}
|
||||
@cindex @code{intersection} in rx
|
||||
Match a character included in all of the @var{charset}s.
|
||||
Each @var{charset} can be an @code{any} form without character
|
||||
classes, or an @code{intersection}, @code{or} or @code{not} form whose
|
||||
arguments are also @var{charset}s.
|
||||
Each @var{charset} can be a character, a single-character string, an
|
||||
@code{any} form without character classes, or an @code{intersection},
|
||||
@code{or} or @code{not} form whose arguments are also @var{charset}s.
|
||||
|
||||
@item @code{not-newline}, @code{nonl}
|
||||
@cindex @code{not-newline} in rx
|
||||
|
|
|
|||
3
etc/NEWS
3
etc/NEWS
|
|
@ -2124,6 +2124,9 @@ Both match any single character; 'anychar' is more descriptive.
|
|||
With 'or' and 'not', it can be used to compose character-matching
|
||||
expressions from simpler parts.
|
||||
|
||||
+++
|
||||
*** 'not' argument can now be a character or single-char string.
|
||||
|
||||
** Frames
|
||||
|
||||
+++
|
||||
|
|
|
|||
|
|
@ -309,6 +309,8 @@ and set operations."
|
|||
(rx--every (lambda (x) (not (symbolp x))) (cdr form)))
|
||||
(and (memq (car form) '(not or | intersection))
|
||||
(rx--every #'rx--charset-p (cdr form)))))
|
||||
(characterp form)
|
||||
(and (stringp form) (= (length form) 1))
|
||||
(and (or (symbolp form) (consp form))
|
||||
(let ((expanded (rx--expand-def form)))
|
||||
(and expanded
|
||||
|
|
@ -521,6 +523,11 @@ If NEGATED, negate the sense (thus making it positive)."
|
|||
((eq arg 'word-boundary)
|
||||
(rx--translate-symbol
|
||||
(if negated 'word-boundary 'not-word-boundary)))
|
||||
((characterp arg)
|
||||
(rx--generate-alt (not negated) (list (cons arg arg)) nil))
|
||||
((and (stringp arg) (= (length arg) 1))
|
||||
(let ((char (string-to-char arg)))
|
||||
(rx--generate-alt (not negated) (list (cons char char)) nil)))
|
||||
((let ((expanded (rx--expand-def arg)))
|
||||
(and expanded
|
||||
(rx--translate-not negated (list expanded)))))
|
||||
|
|
@ -571,8 +578,8 @@ If NEGATED, negate the sense (thus making it positive)."
|
|||
(defun rx--charset-intervals (charset)
|
||||
"Return a sorted list of non-adjacent disjoint intervals from CHARSET.
|
||||
CHARSET is any expression allowed in a character set expression:
|
||||
either `any' (no classes permitted), or `not', `or' or `intersection'
|
||||
forms whose arguments are charsets."
|
||||
characters, single-char strings, `any' forms (no classes permitted),
|
||||
or `not', `or' or `intersection' forms whose arguments are charsets."
|
||||
(pcase charset
|
||||
(`(,(or 'any 'in 'char) . ,body)
|
||||
(let ((parsed (rx--parse-any body)))
|
||||
|
|
@ -584,6 +591,11 @@ forms whose arguments are charsets."
|
|||
(`(not ,x) (rx--complement-intervals (rx--charset-intervals x)))
|
||||
(`(,(or 'or '|) . ,body) (rx--charset-union body))
|
||||
(`(intersection . ,body) (rx--charset-intersection body))
|
||||
((pred characterp)
|
||||
(list (cons charset charset)))
|
||||
((guard (and (stringp charset) (= (length charset) 1)))
|
||||
(let ((char (string-to-char charset)))
|
||||
(list (cons char char))))
|
||||
(_ (let ((expanded (rx--expand-def charset)))
|
||||
(if expanded
|
||||
(rx--charset-intervals expanded)
|
||||
|
|
@ -1161,10 +1173,12 @@ CHAR Match a literal character.
|
|||
character, a string, a range as string \"A-Z\" or cons
|
||||
(?A . ?Z), or a character class (see below). Alias: in, char.
|
||||
(not CHARSPEC) Match one character not matched by CHARSPEC. CHARSPEC
|
||||
can be (any ...), (or ...), (intersection ...),
|
||||
(syntax ...), (category ...), or a character class.
|
||||
(intersection CHARSET...) Intersection of CHARSETs.
|
||||
CHARSET is (any...), (not...), (or...) or (intersection...).
|
||||
can be a character, single-char string, (any ...), (or ...),
|
||||
(intersection ...), (syntax ...), (category ...),
|
||||
or a character class.
|
||||
(intersection CHARSET...) Match all CHARSETs.
|
||||
CHARSET is (any...), (not...), (or...) or (intersection...),
|
||||
a character or a single-char string.
|
||||
not-newline Match any character except a newline. Alias: nonl.
|
||||
anychar Match any character. Alias: anything.
|
||||
unmatchable Never match anything at all.
|
||||
|
|
|
|||
|
|
@ -272,7 +272,9 @@
|
|||
(should (equal (rx (not (category tone-mark)) (not (category lao)))
|
||||
"\\C4\\Co"))
|
||||
(should (equal (rx (not (not ascii)) (not (not (not (any "a-z")))))
|
||||
"[[:ascii:]][^a-z]")))
|
||||
"[[:ascii:]][^a-z]"))
|
||||
(should (equal (rx (not ?a) (not "b") (not (not "c")) (not (not ?d)))
|
||||
"[^a][^b]cd")))
|
||||
|
||||
(ert-deftest rx-charset-or ()
|
||||
(should (equal (rx (or))
|
||||
|
|
@ -294,13 +296,17 @@
|
|||
"[a-ru-z]"))
|
||||
(should (equal (rx (or (intersection (any "c-z") (any "a-g"))
|
||||
(not (any "a-k"))))
|
||||
"[^abh-k]")))
|
||||
"[^abh-k]"))
|
||||
(should (equal (rx (or ?f (any "b-e") "a") (not (or ?x "y" (any "s-w"))))
|
||||
"[a-f][^s-y]")))
|
||||
|
||||
(ert-deftest rx-def-in-charset-or ()
|
||||
(rx-let ((a (any "badc"))
|
||||
(b (| a (any "def"))))
|
||||
(should (equal (rx (or b (any "q")))
|
||||
"[a-fq]")))
|
||||
(b (| a (any "def")))
|
||||
(c ?a)
|
||||
(d "b"))
|
||||
(should (equal (rx (or b (any "q")) (or c d))
|
||||
"[a-fq][ab]")))
|
||||
(rx-let ((diff-| (a b) (not (or (not a) b))))
|
||||
(should (equal (rx (diff-| (any "a-z") (any "gr")))
|
||||
"[a-fh-qs-z]"))))
|
||||
|
|
@ -326,7 +332,9 @@
|
|||
"[e-m]"))
|
||||
(should (equal (rx (intersection (or (any "a-f") (any "f-t"))
|
||||
(any "e-w")))
|
||||
"[e-t]")))
|
||||
"[e-t]"))
|
||||
(should (equal (rx (intersection ?m (any "a-z") "m"))
|
||||
"m")))
|
||||
|
||||
(ert-deftest rx-def-in-intersection ()
|
||||
(rx-let ((a (any "a-g"))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue