1
Fork 0
mirror of git://git.sv.gnu.org/emacs.git synced 2026-01-30 04:10:54 -08:00

Avoid resizing mutation in subst-char-in-string, take two

This time we take care to preserve properties, and add a test.

* lisp/subr.el (subst-char-in-string):
Use string-replace to avoid resizing mutation and O(n^2) time.
* test/lisp/subr-tests.el (subr--subst-char-in-string): New test.
This commit is contained in:
Mattias Engdegård 2024-05-13 10:44:05 +02:00
parent 334fb0ddfe
commit 49e243c0c8
2 changed files with 67 additions and 7 deletions

View file

@ -5690,13 +5690,25 @@ The SEPARATOR regexp defaults to \"\\s-+\"."
(defun subst-char-in-string (fromchar tochar string &optional inplace)
"Replace FROMCHAR with TOCHAR in STRING each time it occurs.
Unless optional argument INPLACE is non-nil, return a new string."
(let ((i (length string))
(newstr (if inplace string (copy-sequence string))))
(while (> i 0)
(setq i (1- i))
(if (eq (aref newstr i) fromchar)
(aset newstr i tochar)))
newstr))
(if (and (not inplace)
(if (multibyte-string-p string)
(> (max fromchar tochar) 127)
(> tochar 255)))
;; Avoid quadratic behaviour from resizing replacement.
(let ((res (string-replace (string fromchar) (string tochar) string)))
(unless (eq res string)
;; Mend properties broken by the replacement.
;; Not fast, but this case never was.
(dolist (p (object-intervals string))
(set-text-properties (nth 0 p) (nth 1 p) (nth 2 p) res)))
res)
(let ((i (length string))
(newstr (if inplace string (copy-sequence string))))
(while (> i 0)
(setq i (1- i))
(if (eq (aref newstr i) fromchar)
(aset newstr i tochar)))
newstr)))
(defun string-replace (from-string to-string in-string)
"Replace FROM-STRING with TO-STRING in IN-STRING each time it occurs."

View file

@ -1330,5 +1330,53 @@ final or penultimate step during initialization."))
(t x) (:success (1+ x)))
'(error "")))))
(ert-deftest subr--subst-char-in-string ()
;; Cross-validate `subst-char-in-string' with `string-replace',
;; which should produce the same results when there are no properties.
(dolist (str '("ananas" "na\x80ma\x80s" "hétérogénéité"
"Ω, Ω, Ω" "é-\x80-\x80"))
(dolist (mb '(nil t))
(unless (and (not mb) (multibyte-string-p str))
(let ((str (if (and mb (not (multibyte-string-p str)))
(string-to-multibyte str)
str)))
(dolist (inplace '(nil t))
(dolist (from '(?a #x80 #x3fff80))
(dolist (to '(?o ?☃ #x1313f #xff #x3fffc9))
;; Can't put a non-byte value in a non-ASCII unibyte string.
(unless (and (not mb) (> to #xff)
(not (string-match-p (rx bos (* ascii) eos) str)))
(let* ((in (copy-sequence str))
(ref (if (and (not mb) (> from #xff))
in ; nothing to replace
(string-replace
(if (and (not mb) (<= from #xff))
(unibyte-string from)
(string from))
(if (and (not mb) (<= to #xff))
(unibyte-string to)
(string to))
in)))
(out (subst-char-in-string from to in inplace)))
(should (equal out ref))
(if inplace
(should (eq out in))
(should (equal in str))))))))))))
;; Verify that properties are preserved.
(dolist (str (list "cocoa" (string-to-multibyte "cocoa") "écalé"))
(dolist (from '(?a ?o ?c ))
(dolist (to '(?i ?☃))
(let ((in (copy-sequence str)))
(put-text-property 0 5 'alpha 1 in)
(put-text-property 1 4 'beta 2 in)
(put-text-property 0 2 'gamma 3 in)
(put-text-property 1 4 'delta 4 in)
(put-text-property 2 3 'epsilon 5 in)
(let* ((props-in (copy-tree (object-intervals in)))
(out (subst-char-in-string from to in))
(props-out (object-intervals out)))
(should (equal props-out props-in))))))))
(provide 'subr-tests)
;;; subr-tests.el ends here