1
Fork 0
mirror of git://git.sv.gnu.org/emacs.git synced 2026-01-04 02:51:31 -08:00

Add new function 'dom-inner-text'

This replaces 'dom-text' and 'dom-texts', and is both more correct and
more efficient than them.

* lisp/dom.el (dom-text, dom-texts): Make obsolete in favor of...
(dom-inner-text--1, dom-inner-text): ... these new functions.  Update
callers.

* doc/lispref/text.texi (Document Object Model): Update documentation to
refer to 'dom-inner-text'.

* etc/NEWS: Announce this change.
This commit is contained in:
Jim Porter 2025-07-20 13:48:22 -07:00
parent 33161e51e5
commit 2e53c7d08b
5 changed files with 39 additions and 15 deletions

View file

@ -5722,14 +5722,10 @@ Return all the non-string children of the node.
@item dom-attributes @var{node} @item dom-attributes @var{node}
Return the key/value pair list of attributes of the node. Return the key/value pair list of attributes of the node.
@item dom-text @var{node} @item dom-inner-text @var{node}
Return all the textual elements of the node as a concatenated string.
@item dom-texts @var{node}
Return all the textual elements of the node, as well as the textual Return all the textual elements of the node, as well as the textual
elements of all the children of the node, recursively, as a elements of all the children of the node, recursively, as a
concatenated string. This function also takes an optional separator concatenated string.
to be inserted between the textual elements.
@item dom-parent @var{dom} @var{node} @item dom-parent @var{dom} @var{node}
Return the parent of @var{node} in @var{dom}. Return the parent of @var{node} in @var{dom}.

View file

@ -2635,6 +2635,12 @@ sub-directories, recursively, which were not already natively compiled.
This function takes two RGB lists and optional ALPHA and returns an RGB This function takes two RGB lists and optional ALPHA and returns an RGB
list whose elements are blended in linear space proportional to ALPHA. list whose elements are blended in linear space proportional to ALPHA.
+++
** New function 'dom-inner-text'.
This function gets all the text within a DOM node recursively, returning
it as a concatenated string. It replaces the now-obsolete functions
'dom-text' and 'dom-texts'.
+++ +++
** The 'defcustom' ':local' keyword can now be 'permanent-only'. ** The 'defcustom' ':local' keyword can now be 'permanent-only'.
This means that the variable's 'permanent-local' property is set to t, This means that the variable's 'permanent-local' property is set to t,

View file

@ -75,10 +75,12 @@ A typical attribute is `href'."
(defun dom-text (node) (defun dom-text (node)
"Return all the text bits in the current node concatenated." "Return all the text bits in the current node concatenated."
(declare (obsolete 'dom-inner-text "31.1"))
(mapconcat #'identity (cl-remove-if-not #'stringp (dom-children node)) " ")) (mapconcat #'identity (cl-remove-if-not #'stringp (dom-children node)) " "))
(defun dom-texts (node &optional separator) (defun dom-texts (node &optional separator)
"Return all textual data under NODE concatenated with SEPARATOR in-between." "Return all textual data under NODE concatenated with SEPARATOR in-between."
(declare (obsolete 'dom-inner-text "31.1"))
(if (eq (dom-tag node) 'script) (if (eq (dom-tag node) 'script)
"" ""
(mapconcat (mapconcat
@ -93,6 +95,25 @@ A typical attribute is `href'."
(dom-children node) (dom-children node)
(or separator " ")))) (or separator " "))))
(defun dom-inner-text--1 (node)
(dolist (child (dom-children node))
(cond
((stringp child) (insert child))
((memq (dom-tag child) '(script comment)))
(t (dom-inner-text--1 child)))))
(defun dom-inner-text (node)
"Return all textual data under NODE as a single string."
(let ((children (dom-children node)))
(if (and (length= children 1)
(stringp (car children)))
;; Copy the string content when returning to be consistent with
;; the other branch of this `if' expression.
(copy-sequence (car children))
(with-work-buffer
(dom-inner-text--1 node)
(buffer-string)))))
(defun dom-child-by-tag (dom tag) (defun dom-child-by-tag (dom tag)
"Return the first child of DOM that is of type TAG." "Return the first child of DOM that is of type TAG."
(assoc tag (dom-children dom))) (assoc tag (dom-children dom)))

View file

@ -110,7 +110,8 @@
(defun nnatom--dom-line (node) (defun nnatom--dom-line (node)
"Return NODE's text as a single, whitespace-trimmed line." "Return NODE's text as a single, whitespace-trimmed line."
(string-trim (replace-regexp-in-string "[\r\n]+" " " (dom-text node) t))) (string-trim (replace-regexp-in-string
"[\r\n]+" " " (dom-inner-text node) t)))
(defun nnatom--read-title (group) (defun nnatom--read-title (group)
"Return the title of GROUP, or nil." "Return the title of GROUP, or nil."
@ -245,7 +246,7 @@ return the subject. Otherwise, return nil."
(dom-print (dom-child-by-tag part 'div) nil t) (dom-print (dom-child-by-tag part 'div) nil t)
(buffer-substring-no-properties (buffer-substring-no-properties
(point-min) (point-max))) (point-min) (point-max)))
(dom-text part))) (dom-inner-text part)))
(type (if (member type atypes) (concat "text/" type) type)) (type (if (member type atypes) (concat "text/" type) type))
(type (or (cdr (assoc type mtypes)) type))) (type (or (cdr (assoc type mtypes)) type)))
(unless (string-blank-p part) (unless (string-blank-p part)

View file

@ -1033,7 +1033,7 @@ This replaces the region with the preprocessed HTML."
(plist-put eww-data :title (plist-put eww-data :title
(replace-regexp-in-string (replace-regexp-in-string
"^ \\| $" "" "^ \\| $" ""
(replace-regexp-in-string "[ \t\r\n]+" " " (dom-text dom)))) (replace-regexp-in-string "[ \t\r\n]+" " " (dom-inner-text dom))))
(eww--after-page-change)) (eww--after-page-change))
(defun eww-display-raw (buffer &optional encode) (defun eww-display-raw (buffer &optional encode)
@ -1204,7 +1204,7 @@ non-nil, don't actually compute a score; just call the callback."
(setq score 2 (setq score 2
noscore t)) noscore t))
((eq (dom-tag node) 'a) ((eq (dom-tag node) 'a)
(setq score (- (length (split-string (dom-text node)))) (setq score (- (length (split-string (dom-inner-text node))))
noscore t)) noscore t))
(t (t
(setq score -1)))) (setq score -1))))
@ -1229,7 +1229,7 @@ If EWW can't create a readable version, return nil instead."
(when (and score (> score best-score) (when (and score (> score best-score)
;; We set a lower bound to how long we accept that ;; We set a lower bound to how long we accept that
;; the readable portion of the page is going to be. ;; the readable portion of the page is going to be.
(> (length (split-string (dom-texts node))) 100)) (> (length (split-string (dom-inner-text node))) 100))
(setq best-score score (setq best-score score
best-node node)) best-node node))
;; Keep track of any <title> and <link> tags we find to include ;; Keep track of any <title> and <link> tags we find to include
@ -1244,7 +1244,7 @@ If EWW can't create a readable version, return nil instead."
;; directly in our list in addition to as a child of some ;; directly in our list in addition to as a child of some
;; other node in the list. This is ok for <title> and <link> ;; other node in the list. This is ok for <title> and <link>
;; tags, but might need changed if supporting other tags. ;; tags, but might need changed if supporting other tags.
(let* ((inner-text (dom-texts node "")) (let* ((inner-text (dom-inner-text node))
(new-node `(,(dom-tag node) (new-node `(,(dom-tag node)
,(dom-attributes node) ,(dom-attributes node)
,@(when (length> inner-text 0) ,@(when (length> inner-text 0)
@ -1276,7 +1276,7 @@ If EWW can't create a readable version, return nil instead."
most-negative-fixnum)) most-negative-fixnum))
;; We set a lower bound to how long we accept that the ;; We set a lower bound to how long we accept that the
;; readable portion of the page is going to be. ;; readable portion of the page is going to be.
(when (> (length (split-string (dom-texts highest))) 100) (when (> (length (split-string (dom-inner-text highest))) 100)
(setq result highest)))) (setq result highest))))
result)) result))
@ -1901,7 +1901,7 @@ See URL `https://developer.mozilla.org/en-US/docs/Web/HTML/Element/Input'.")
'display (make-string (length value) ?*))))))))) 'display (make-string (length value) ?*)))))))))
(defun eww-tag-textarea (dom) (defun eww-tag-textarea (dom)
(let ((value (or (dom-text dom) "")) (let ((value (or (dom-inner-text dom) ""))
(lines (string-to-number (or (dom-attr dom 'rows) "10"))) (lines (string-to-number (or (dom-attr dom 'rows) "10")))
(width (string-to-number (or (dom-attr dom 'cols) "10"))) (width (string-to-number (or (dom-attr dom 'cols) "10")))
start end form) start end form)
@ -1977,7 +1977,7 @@ See URL `https://developer.mozilla.org/en-US/docs/Web/HTML/Element/Input'.")
(dolist (elem (dom-by-tag dom 'option)) (dolist (elem (dom-by-tag dom 'option))
(when (dom-attr elem 'selected) (when (dom-attr elem 'selected)
(nconc menu (list :value (dom-attr elem 'value)))) (nconc menu (list :value (dom-attr elem 'value))))
(let ((display (dom-text elem))) (let ((display (dom-inner-text elem)))
(setq max (max max (length display))) (setq max (max max (length display)))
(push (list 'item (push (list 'item
:value (dom-attr elem 'value) :value (dom-attr elem 'value)