mirror of
git://git.sv.gnu.org/emacs.git
synced 2026-01-04 02:51:31 -08:00
Add new function 'dom-inner-text'
This replaces 'dom-text' and 'dom-texts', and is both more correct and more efficient than them. * lisp/dom.el (dom-text, dom-texts): Make obsolete in favor of... (dom-inner-text--1, dom-inner-text): ... these new functions. Update callers. * doc/lispref/text.texi (Document Object Model): Update documentation to refer to 'dom-inner-text'. * etc/NEWS: Announce this change.
This commit is contained in:
parent
33161e51e5
commit
2e53c7d08b
5 changed files with 39 additions and 15 deletions
|
|
@ -5722,14 +5722,10 @@ Return all the non-string children of the node.
|
||||||
@item dom-attributes @var{node}
|
@item dom-attributes @var{node}
|
||||||
Return the key/value pair list of attributes of the node.
|
Return the key/value pair list of attributes of the node.
|
||||||
|
|
||||||
@item dom-text @var{node}
|
@item dom-inner-text @var{node}
|
||||||
Return all the textual elements of the node as a concatenated string.
|
|
||||||
|
|
||||||
@item dom-texts @var{node}
|
|
||||||
Return all the textual elements of the node, as well as the textual
|
Return all the textual elements of the node, as well as the textual
|
||||||
elements of all the children of the node, recursively, as a
|
elements of all the children of the node, recursively, as a
|
||||||
concatenated string. This function also takes an optional separator
|
concatenated string.
|
||||||
to be inserted between the textual elements.
|
|
||||||
|
|
||||||
@item dom-parent @var{dom} @var{node}
|
@item dom-parent @var{dom} @var{node}
|
||||||
Return the parent of @var{node} in @var{dom}.
|
Return the parent of @var{node} in @var{dom}.
|
||||||
|
|
|
||||||
6
etc/NEWS
6
etc/NEWS
|
|
@ -2635,6 +2635,12 @@ sub-directories, recursively, which were not already natively compiled.
|
||||||
This function takes two RGB lists and optional ALPHA and returns an RGB
|
This function takes two RGB lists and optional ALPHA and returns an RGB
|
||||||
list whose elements are blended in linear space proportional to ALPHA.
|
list whose elements are blended in linear space proportional to ALPHA.
|
||||||
|
|
||||||
|
+++
|
||||||
|
** New function 'dom-inner-text'.
|
||||||
|
This function gets all the text within a DOM node recursively, returning
|
||||||
|
it as a concatenated string. It replaces the now-obsolete functions
|
||||||
|
'dom-text' and 'dom-texts'.
|
||||||
|
|
||||||
+++
|
+++
|
||||||
** The 'defcustom' ':local' keyword can now be 'permanent-only'.
|
** The 'defcustom' ':local' keyword can now be 'permanent-only'.
|
||||||
This means that the variable's 'permanent-local' property is set to t,
|
This means that the variable's 'permanent-local' property is set to t,
|
||||||
|
|
|
||||||
21
lisp/dom.el
21
lisp/dom.el
|
|
@ -75,10 +75,12 @@ A typical attribute is `href'."
|
||||||
|
|
||||||
(defun dom-text (node)
|
(defun dom-text (node)
|
||||||
"Return all the text bits in the current node concatenated."
|
"Return all the text bits in the current node concatenated."
|
||||||
|
(declare (obsolete 'dom-inner-text "31.1"))
|
||||||
(mapconcat #'identity (cl-remove-if-not #'stringp (dom-children node)) " "))
|
(mapconcat #'identity (cl-remove-if-not #'stringp (dom-children node)) " "))
|
||||||
|
|
||||||
(defun dom-texts (node &optional separator)
|
(defun dom-texts (node &optional separator)
|
||||||
"Return all textual data under NODE concatenated with SEPARATOR in-between."
|
"Return all textual data under NODE concatenated with SEPARATOR in-between."
|
||||||
|
(declare (obsolete 'dom-inner-text "31.1"))
|
||||||
(if (eq (dom-tag node) 'script)
|
(if (eq (dom-tag node) 'script)
|
||||||
""
|
""
|
||||||
(mapconcat
|
(mapconcat
|
||||||
|
|
@ -93,6 +95,25 @@ A typical attribute is `href'."
|
||||||
(dom-children node)
|
(dom-children node)
|
||||||
(or separator " "))))
|
(or separator " "))))
|
||||||
|
|
||||||
|
(defun dom-inner-text--1 (node)
|
||||||
|
(dolist (child (dom-children node))
|
||||||
|
(cond
|
||||||
|
((stringp child) (insert child))
|
||||||
|
((memq (dom-tag child) '(script comment)))
|
||||||
|
(t (dom-inner-text--1 child)))))
|
||||||
|
|
||||||
|
(defun dom-inner-text (node)
|
||||||
|
"Return all textual data under NODE as a single string."
|
||||||
|
(let ((children (dom-children node)))
|
||||||
|
(if (and (length= children 1)
|
||||||
|
(stringp (car children)))
|
||||||
|
;; Copy the string content when returning to be consistent with
|
||||||
|
;; the other branch of this `if' expression.
|
||||||
|
(copy-sequence (car children))
|
||||||
|
(with-work-buffer
|
||||||
|
(dom-inner-text--1 node)
|
||||||
|
(buffer-string)))))
|
||||||
|
|
||||||
(defun dom-child-by-tag (dom tag)
|
(defun dom-child-by-tag (dom tag)
|
||||||
"Return the first child of DOM that is of type TAG."
|
"Return the first child of DOM that is of type TAG."
|
||||||
(assoc tag (dom-children dom)))
|
(assoc tag (dom-children dom)))
|
||||||
|
|
|
||||||
|
|
@ -110,7 +110,8 @@
|
||||||
|
|
||||||
(defun nnatom--dom-line (node)
|
(defun nnatom--dom-line (node)
|
||||||
"Return NODE's text as a single, whitespace-trimmed line."
|
"Return NODE's text as a single, whitespace-trimmed line."
|
||||||
(string-trim (replace-regexp-in-string "[\r\n]+" " " (dom-text node) t)))
|
(string-trim (replace-regexp-in-string
|
||||||
|
"[\r\n]+" " " (dom-inner-text node) t)))
|
||||||
|
|
||||||
(defun nnatom--read-title (group)
|
(defun nnatom--read-title (group)
|
||||||
"Return the title of GROUP, or nil."
|
"Return the title of GROUP, or nil."
|
||||||
|
|
@ -245,7 +246,7 @@ return the subject. Otherwise, return nil."
|
||||||
(dom-print (dom-child-by-tag part 'div) nil t)
|
(dom-print (dom-child-by-tag part 'div) nil t)
|
||||||
(buffer-substring-no-properties
|
(buffer-substring-no-properties
|
||||||
(point-min) (point-max)))
|
(point-min) (point-max)))
|
||||||
(dom-text part)))
|
(dom-inner-text part)))
|
||||||
(type (if (member type atypes) (concat "text/" type) type))
|
(type (if (member type atypes) (concat "text/" type) type))
|
||||||
(type (or (cdr (assoc type mtypes)) type)))
|
(type (or (cdr (assoc type mtypes)) type)))
|
||||||
(unless (string-blank-p part)
|
(unless (string-blank-p part)
|
||||||
|
|
|
||||||
|
|
@ -1033,7 +1033,7 @@ This replaces the region with the preprocessed HTML."
|
||||||
(plist-put eww-data :title
|
(plist-put eww-data :title
|
||||||
(replace-regexp-in-string
|
(replace-regexp-in-string
|
||||||
"^ \\| $" ""
|
"^ \\| $" ""
|
||||||
(replace-regexp-in-string "[ \t\r\n]+" " " (dom-text dom))))
|
(replace-regexp-in-string "[ \t\r\n]+" " " (dom-inner-text dom))))
|
||||||
(eww--after-page-change))
|
(eww--after-page-change))
|
||||||
|
|
||||||
(defun eww-display-raw (buffer &optional encode)
|
(defun eww-display-raw (buffer &optional encode)
|
||||||
|
|
@ -1204,7 +1204,7 @@ non-nil, don't actually compute a score; just call the callback."
|
||||||
(setq score 2
|
(setq score 2
|
||||||
noscore t))
|
noscore t))
|
||||||
((eq (dom-tag node) 'a)
|
((eq (dom-tag node) 'a)
|
||||||
(setq score (- (length (split-string (dom-text node))))
|
(setq score (- (length (split-string (dom-inner-text node))))
|
||||||
noscore t))
|
noscore t))
|
||||||
(t
|
(t
|
||||||
(setq score -1))))
|
(setq score -1))))
|
||||||
|
|
@ -1229,7 +1229,7 @@ If EWW can't create a readable version, return nil instead."
|
||||||
(when (and score (> score best-score)
|
(when (and score (> score best-score)
|
||||||
;; We set a lower bound to how long we accept that
|
;; We set a lower bound to how long we accept that
|
||||||
;; the readable portion of the page is going to be.
|
;; the readable portion of the page is going to be.
|
||||||
(> (length (split-string (dom-texts node))) 100))
|
(> (length (split-string (dom-inner-text node))) 100))
|
||||||
(setq best-score score
|
(setq best-score score
|
||||||
best-node node))
|
best-node node))
|
||||||
;; Keep track of any <title> and <link> tags we find to include
|
;; Keep track of any <title> and <link> tags we find to include
|
||||||
|
|
@ -1244,7 +1244,7 @@ If EWW can't create a readable version, return nil instead."
|
||||||
;; directly in our list in addition to as a child of some
|
;; directly in our list in addition to as a child of some
|
||||||
;; other node in the list. This is ok for <title> and <link>
|
;; other node in the list. This is ok for <title> and <link>
|
||||||
;; tags, but might need changed if supporting other tags.
|
;; tags, but might need changed if supporting other tags.
|
||||||
(let* ((inner-text (dom-texts node ""))
|
(let* ((inner-text (dom-inner-text node))
|
||||||
(new-node `(,(dom-tag node)
|
(new-node `(,(dom-tag node)
|
||||||
,(dom-attributes node)
|
,(dom-attributes node)
|
||||||
,@(when (length> inner-text 0)
|
,@(when (length> inner-text 0)
|
||||||
|
|
@ -1276,7 +1276,7 @@ If EWW can't create a readable version, return nil instead."
|
||||||
most-negative-fixnum))
|
most-negative-fixnum))
|
||||||
;; We set a lower bound to how long we accept that the
|
;; We set a lower bound to how long we accept that the
|
||||||
;; readable portion of the page is going to be.
|
;; readable portion of the page is going to be.
|
||||||
(when (> (length (split-string (dom-texts highest))) 100)
|
(when (> (length (split-string (dom-inner-text highest))) 100)
|
||||||
(setq result highest))))
|
(setq result highest))))
|
||||||
result))
|
result))
|
||||||
|
|
||||||
|
|
@ -1901,7 +1901,7 @@ See URL `https://developer.mozilla.org/en-US/docs/Web/HTML/Element/Input'.")
|
||||||
'display (make-string (length value) ?*)))))))))
|
'display (make-string (length value) ?*)))))))))
|
||||||
|
|
||||||
(defun eww-tag-textarea (dom)
|
(defun eww-tag-textarea (dom)
|
||||||
(let ((value (or (dom-text dom) ""))
|
(let ((value (or (dom-inner-text dom) ""))
|
||||||
(lines (string-to-number (or (dom-attr dom 'rows) "10")))
|
(lines (string-to-number (or (dom-attr dom 'rows) "10")))
|
||||||
(width (string-to-number (or (dom-attr dom 'cols) "10")))
|
(width (string-to-number (or (dom-attr dom 'cols) "10")))
|
||||||
start end form)
|
start end form)
|
||||||
|
|
@ -1977,7 +1977,7 @@ See URL `https://developer.mozilla.org/en-US/docs/Web/HTML/Element/Input'.")
|
||||||
(dolist (elem (dom-by-tag dom 'option))
|
(dolist (elem (dom-by-tag dom 'option))
|
||||||
(when (dom-attr elem 'selected)
|
(when (dom-attr elem 'selected)
|
||||||
(nconc menu (list :value (dom-attr elem 'value))))
|
(nconc menu (list :value (dom-attr elem 'value))))
|
||||||
(let ((display (dom-text elem)))
|
(let ((display (dom-inner-text elem)))
|
||||||
(setq max (max max (length display)))
|
(setq max (max max (length display)))
|
||||||
(push (list 'item
|
(push (list 'item
|
||||||
:value (dom-attr elem 'value)
|
:value (dom-attr elem 'value)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue