;;; nxml-mode.el --- a new XML mode -*- lexical-binding:t -*-
;; Copyright (C) 2003-2004, 2007-2025 Free Software Foundation, Inc.
;; Author: James Clark
;; Keywords: text, hypermedia, languages, XML
;; This file is part of GNU Emacs.
;; GNU Emacs is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs. If not, see .
;;; Commentary:
;; See nxml-rap.el for description of parsing strategy.
;;; Code:
(eval-when-compile (require 'cl-lib))
(require 'xmltok)
(require 'nxml-enc)
(require 'nxml-util)
(require 'nxml-rap)
(require 'nxml-outln)
;; nxml-mode calls rng-nxml-mode-init, which is autoloaded from rng-nxml.
;; So we might as well just require it and silence the compiler.
(provide 'nxml-mode) ; avoid recursive require
(require 'rng-nxml)
(require 'sgml-mode)
;;; Customization
(defgroup nxml nil
"New XML editing mode."
:link '(custom-manual "(nxml-mode) Top")
:group 'languages)
(defgroup nxml-faces nil
"Faces for XML syntax highlighting."
:group 'nxml)
(defcustom nxml-char-ref-display-glyph-flag t
"Non-nil means display glyph following character reference.
The glyph is displayed in face `nxml-glyph'."
:group 'nxml
:type 'boolean
:safe #'booleanp)
(defcustom nxml-sexp-element-flag t
"Non-nil means sexp commands treat an element as a single expression."
:version "27.1" ; nil -> t
:group 'nxml
:type 'boolean
:safe #'booleanp)
(defcustom nxml-slash-auto-complete-flag nil
"Non-nil means typing a slash automatically completes the end-tag.
This is used by `nxml-electric-slash'."
:group 'nxml
:type 'boolean
:safe #'booleanp)
(defcustom nxml-child-indent 2
"Indentation for the children of an element relative to the start-tag.
This only applies when the line or lines containing the start-tag contains
nothing else other than that start-tag."
:group 'nxml
:type 'integer
:safe #'integerp)
(defcustom nxml-attribute-indent 4
"Indentation for the attributes of an element relative to the start-tag.
This only applies when the first attribute of a tag starts a line.
In other cases, the first attribute on one line is indented the same
as the first attribute on the previous line."
:group 'nxml
:type 'integer
:safe #'integerp)
(defcustom nxml-bind-meta-tab-to-complete-flag t
"Non-nil means to use nXML completion in \\[completion-at-point]."
:group 'nxml
:type 'boolean
:safe #'booleanp)
(defcustom nxml-prefer-utf-16-to-utf-8-flag nil
"Non-nil means prefer UTF-16 to UTF-8 when saving a buffer.
This is used only when a buffer does not contain an encoding declaration
and when its current `buffer-file-coding-system' specifies neither UTF-16
nor UTF-8."
:group 'nxml
:type 'boolean
:safe #'booleanp)
(defcustom nxml-prefer-utf-16-little-to-big-endian-flag (eq system-type
'windows-nt)
"Non-nil means prefer little-endian to big-endian byte-order for UTF-16.
This is used only for saving a buffer; when reading the
byte-order is auto-detected. It may be relevant both when there
is no encoding declaration and when the encoding declaration
specifies `UTF-16'."
:group 'nxml
:type 'boolean
:safe #'booleanp)
(defcustom nxml-default-buffer-file-coding-system nil
"Default value for `buffer-file-coding-system' for a buffer for a new file.
A value of nil means use the default value of
`buffer-file-coding-system' as normal.
A buffer's `buffer-file-coding-system' affects what
\\[nxml-insert-xml-declaration] inserts."
:group 'nxml
:type 'coding-system
:safe #'coding-system-p)
(defcustom nxml-auto-insert-xml-declaration-flag nil
"Non-nil means automatically insert an XML declaration in a new file.
The XML declaration is inserted using `nxml-insert-xml-declaration'."
:group 'nxml
:type 'boolean
:safe #'booleanp)
(defface nxml-delimited-data
'((t (:inherit font-lock-doc-face)))
"Face used to highlight data enclosed between delimiters.
This is not used directly, but only via inheritance by other faces."
:group 'nxml-faces)
(defface nxml-name
'((t (:inherit font-lock-builtin-face)))
"Face used to highlight various names.
This includes element and attribute names, processing
instruction targets and the CDATA keyword in a CDATA section.
This is not used directly, but only via inheritance by other faces."
:group 'nxml-faces)
(defface nxml-ref
'((t (:inherit font-lock-constant-face)))
"Face used to highlight character and entity references.
This is not used directly, but only via inheritance by other faces."
:group 'nxml-faces)
(defface nxml-text
'((t (:inherit default)))
"Face used to highlight text."
:group 'nxml-faces)
(defface nxml-delimiter
'((t (:inherit nxml-text)))
"Face used to highlight delimiters.
This is not used directly, but only via inheritance by other faces."
:group 'nxml-faces)
(defface nxml-processing-instruction-delimiter
'((t (:inherit nxml-delimiter)))
"Face used for the delimiters of processing instructions, i.e., and ?>."
:group 'nxml-faces)
(defface nxml-processing-instruction-target
'((t (:inherit font-lock-keyword-face)))
"Face used for the target of processing instructions."
:group 'nxml-faces)
(defface nxml-processing-instruction-content
'((t (:inherit nxml-delimited-data)))
"Face used for the content of processing instructions."
:group 'nxml-faces)
(defface nxml-cdata-section-delimiter
'((t (:inherit nxml-delimiter)))
"Face used for the delimiters of CDATA sections, i.e., ."
:group 'nxml-faces)
(defface nxml-cdata-section-CDATA
'((t (:inherit nxml-name)))
"Face used for the CDATA keyword in CDATA sections."
:group 'nxml-faces)
(defface nxml-cdata-section-content
'((t (:inherit nxml-text)))
"Face used for the content of CDATA sections."
:group 'nxml-faces)
(defface nxml-char-ref-number
'((t (:inherit nxml-ref)))
"Face used for the number in character references.
This includes the `x' in hex references."
:group 'nxml-faces)
(defface nxml-char-ref-delimiter
'((t (:inherit nxml-ref)))
"Face used for the delimiters of character references, i.e., and ;."
:group 'nxml-faces)
(defface nxml-entity-ref-name
'((t (:inherit nxml-ref)))
"Face used for the entity name in general entity references."
:group 'nxml-faces)
(defface nxml-entity-ref-delimiter
'((t (:inherit nxml-ref)))
"Face used for the delimiters of entity references, i.e., & and ;."
:group 'nxml-faces)
(defface nxml-tag-delimiter
'((t (:inherit nxml-delimiter)))
"Face used for the angle brackets delimiting tags.
`nxml-tag-slash' is used for slashes."
:group 'nxml-faces)
(defface nxml-tag-slash
'((t (:inherit nxml-tag-delimiter)))
"Face used for slashes in tags, both in end-tags and empty-elements."
:group 'nxml-faces)
(defface nxml-element-prefix
'((t (:inherit nxml-name)))
"Face used for the prefix of elements."
:group 'nxml-faces)
(defface nxml-element-colon
'((t (:inherit nxml-delimiter)))
"Face used for the colon in element names."
:group 'nxml-faces)
(defface nxml-element-local-name
'((t (:inherit font-lock-function-name-face)))
"Face used for the local name of elements."
:group 'nxml-faces)
(defface nxml-attribute-prefix
'((t (:inherit nxml-name)))
"Face used for the prefix of attributes."
:group 'nxml-faces)
(defface nxml-attribute-colon
'((t (:inherit nxml-delimiter)))
"Face used for the colon in attribute names."
:group 'nxml-faces)
(defface nxml-attribute-local-name
'((t (:inherit font-lock-variable-name-face)))
"Face used for the local name of attributes."
:group 'nxml-faces)
(defface nxml-namespace-attribute-xmlns
'((t (:inherit nxml-attribute-prefix)))
"Face used for `xmlns' in namespace attributes."
:group 'nxml-faces)
(defface nxml-namespace-attribute-colon
'((t (:inherit nxml-attribute-colon)))
"Face used for the colon in namespace attributes."
:group 'nxml-faces)
(defface nxml-namespace-attribute-prefix
'((t (:inherit nxml-attribute-local-name)))
"Face used for the prefix declared in namespace attributes."
:group 'nxml-faces)
(defface nxml-attribute-value
'((t (:inherit font-lock-string-face)))
"Face used for the value of attributes."
:group 'nxml-faces)
(defface nxml-attribute-value-delimiter
'((t (:inherit nxml-attribute-value)))
"Face used for the delimiters of attribute values."
:group 'nxml-faces)
(defface nxml-prolog-literal-delimiter
'((t (:inherit nxml-delimited-data)))
"Face used for the delimiters of literals in the prolog."
:group 'nxml-faces)
(defface nxml-prolog-literal-content
'((t (:inherit nxml-delimited-data)))
"Face used for the content of literals in the prolog."
:group 'nxml-faces)
(defface nxml-prolog-keyword
'((t (:inherit font-lock-keyword-face)))
"Face used for keywords in the prolog."
:group 'nxml-faces)
(defface nxml-markup-declaration-delimiter
'((t (:inherit nxml-delimiter)))
"Face used for the delimiters of markup declarations in the prolog.
The delimiters are ."
:group 'nxml-faces)
(defface nxml-hash
'((t (:inherit nxml-name)))
"Face used for # before a name in the prolog."
:group 'nxml-faces)
(defface nxml-glyph
'((((type x))
(:family
"misc-fixed"
:background
"light grey"
:foreground
"black"
:weight
normal
:slant
normal))
(t
(:background
"light grey"
:foreground
"black"
:weight
normal
:slant
normal)))
"Face used for glyph for char references."
:group 'nxml-faces)
;;; Global variables
(defvar-local nxml-parent-document nil
"The parent document for a part of a modular document.
Use `nxml-parent-document-set' to set it.")
(put 'nxml-parent-document 'safe-local-variable 'stringp)
(defvar-local nxml-prolog-regions nil
"List of regions in the prolog to be fontified.
See the function `xmltok-forward-prolog' for more information.")
(defvar-local nxml-degraded nil
"Non-nil if currently operating in degraded mode.
Degraded mode is enabled when an internal error is encountered in the
fontification or after-change functions.")
(defvar nxml-completion-hook nil
"Hook run by `nxml-complete'.
This hook is run until success.")
(defvar nxml-in-mixed-content-hook nil
"Hook to determine whether point is in mixed content.
The hook is called without arguments. It should return nil if it is
definitely not mixed; non-nil otherwise. The hook will be run until
one of the functions returns nil.")
(defvar nxml-mixed-scan-distance 4000
"Maximum distance from point to scan when checking for mixed content.")
(defvar nxml-end-tag-indent-scan-distance 4000
"Maximum distance from point to scan backwards when indenting end-tag.")
(defvar-local nxml-char-ref-extra-display t
"Non-nil means display extra information for character references.
The extra information consists of a tooltip with the character name
and, if `nxml-char-ref-display-glyph-flag' is non-nil, a glyph
corresponding to the referenced character following the character
reference.")
(defvar-keymap nxml-mode-map
:doc "Keymap for `nxml-mode'."
"C-M-u" #'nxml-backward-up-element
"C-M-d" #'nxml-down-element
"C-M-n" #'nxml-forward-element
"C-M-p" #'nxml-backward-element
"M-{" #'nxml-backward-paragraph
"M-}" #'nxml-forward-paragraph
"M-h" #'nxml-mark-paragraph
"C-c C-f" #'nxml-finish-element
"C-c ]" #'nxml-finish-element
"C-c /" #'nxml-finish-element
"C-c C-m" #'nxml-split-element
"C-c C-b" #'nxml-balanced-close-start-tag-block
"C-c C-i" #'nxml-balanced-close-start-tag-inline
"C-c C-x" #'nxml-insert-xml-declaration
"C-c C-d" #'nxml-dynamic-markup-word
;; u is for Unicode
"C-c C-u" #'nxml-insert-named-char
"C-c C-o" nxml-outline-prefix-map
"/" #'nxml-electric-slash
"S-" #'nxml-mouse-hide-direct-text-content)
(defvar nxml-font-lock-keywords
'(nxml-fontify-matcher)
"Default font lock keywords for `nxml-mode'.")
(defsubst nxml-set-face (start end face)
(when (and face (< start end))
;; Prepend, so the character reference highlighting takes precedence over
;; the string highlighting applied syntactically.
(font-lock-prepend-text-property start end 'face face)))
(defun nxml-parent-document-set (parent-document)
"Set `nxml-parent-document' and inherit the DTD &c."
;; FIXME: this does not work.
;; the idea is that by inheriting some variables from the parent,
;; `rng-validate-mode' will validate entities declared in the parent.
;; alas, the most interesting variables (`rng-compile-table' et al)
;; are circular and cannot be printed even with `print-circle'.
(interactive "fParent document")
(let (dtd current-schema current-schema-file-name compile-table
ipattern-table last-ipattern-index)
(when (string= (file-truename parent-document)
(file-truename buffer-file-name))
(error "Parent document cannot be the same as the document"))
(with-current-buffer (find-file-noselect parent-document)
(setq dtd rng-dtd
current-schema rng-current-schema
current-schema-file-name rng-current-schema-file-name
compile-table rng-compile-table
ipattern-table rng-ipattern-table
last-ipattern-index rng-last-ipattern-index
parent-document buffer-file-name))
(setq rng-dtd dtd
rng-current-schema current-schema
rng-current-schema-file-name current-schema-file-name
rng-compile-table compile-table
rng-ipattern-table ipattern-table
rng-last-ipattern-index last-ipattern-index
nxml-parent-document parent-document)
(message "Set parent document to %s" parent-document)
(when rng-validate-mode
(rng-validate-while-idle (current-buffer)))))
(defvar nxml-prolog-end) ;; nxml-rap.el
(defun nxml-syntax-propertize (start end)
"Syntactic keywords for `nxml-mode'."
;; Like `sgml-syntax-propertize', but rescan prolog if needed.
(when (< start nxml-prolog-end)
(nxml-scan-prolog))
(sgml-syntax-propertize start end))
(defvar tildify-space-string)
(defvar tildify-foreach-region-function)
;;;###autoload
(define-derived-mode nxml-mode text-mode "nXML"
;; We use C-c C-i instead of \\[nxml-balanced-close-start-tag-inline]
;; because Emacs turns C-c C-i into C-c TAB which is hard to type and
;; not mnemonic.
"Major mode for editing XML.
\\
\\[nxml-finish-element] finishes the current element by inserting an end-tag.
C-c C-i closes a start-tag with `>' and then inserts a balancing end-tag
leaving point between the start-tag and end-tag.
\\[nxml-balanced-close-start-tag-block] is similar but for block rather than inline elements:
the start-tag, point, and end-tag are all left on separate lines.
If `nxml-slash-auto-complete-flag' is non-nil, then inserting a `'
automatically inserts the rest of the end-tag.
\\[completion-at-point] performs completion on the symbol preceding point.
\\[nxml-dynamic-markup-word] uses the contents of the current buffer
to choose a tag to put around the word preceding point.
Sections of the document can be displayed in outline form. The
variable `nxml-section-element-name-regexp' controls when an element
is recognized as a section. The same key sequences that change
visibility in outline mode are used except that they start with C-c C-o
instead of C-c.
Validation is provided by the related minor-mode `rng-validate-mode'.
This also makes completion schema- and context- sensitive. Element
names, attribute names, attribute values and namespace URIs can all be
completed. By default, `rng-validate-mode' is automatically enabled.
You can toggle it using \\[rng-validate-mode] or change the default by
customizing `rng-nxml-auto-validate-flag'.
\\[indent-for-tab-command] indents the current line appropriately.
This can be customized using the variable `nxml-child-indent'
and the variable `nxml-attribute-indent'.
\\[nxml-insert-named-char] inserts a character reference using
the character's name (by default, the Unicode name).
\\[universal-argument] \\[nxml-insert-named-char] inserts the character directly.
The Emacs commands that normally operate on balanced expressions will
operate on XML markup items. Thus \\[forward-sexp] will move forward
across one markup item; \\[backward-sexp] will move backward across
one markup item; \\[kill-sexp] will kill the following markup item;
\\[mark-sexp] will mark the following markup item. By default, the
complete element is treated as a single markup item; to make each tag be
treated as a separate markup item, set the variable `nxml-sexp-element-flag'
to nil. For more details, see the function `nxml-forward-balanced-item'.
\\[nxml-backward-up-element] and \\[nxml-down-element] move up and down the element structure.
Many aspects this mode can be customized using
\\[customize-group] nxml RET."
;; (kill-all-local-variables)
;; If encoding does not allow non-break space character, use reference.
;; FIXME: This duplicates code from sgml-mode, perhaps derive from it?
;; FIXME: Perhaps use if possible (e.g. XHTML)?
(setq-local tildify-space-string
(if (equal (decode-coding-string
(encode-coding-string " " buffer-file-coding-system)
buffer-file-coding-system) " ")
" " " "))
;; FIXME: Use the fact that we're parsing the document already
;; rather than using regex-based filtering.
(setq-local tildify-foreach-region-function
(apply-partially 'tildify-foreach-ignore-environments
'(("") ("<" . ">"))))
(setq-local mode-line-process '((nxml-degraded "/degraded")))
;; We'll determine the fill prefix ourselves
(setq-local adaptive-fill-mode nil)
(setq-local forward-sexp-function #'nxml-forward-balanced-item)
(setq-local indent-line-function #'nxml-indent-line)
(setq-local fill-paragraph-function #'nxml-do-fill-paragraph)
;; Comment support
;; This doesn't seem to work too well;
;; I think we should probably roll our own nxml-comment-dwim function.
(setq-local comment-indent-function #'nxml-indent-line)
(setq-local comment-start "")
(setq-local comment-end-skip "[ \t\r\n]*-->")
(setq-local comment-line-break-function #'nxml-newline-and-indent)
(setq-local comment-quote-nested-function #'nxml-comment-quote-nested)
(setq-local comment-continue "") ; avoid double-hyphens as a padding
(setq-local hs-block-start-regexp "<[^/>]*?")
(setq-local hs-block-end-regexp "[^/>]*[^/]>")
(setq-local hs-c-start-regexp ""))
((eq xmltok-type 'cdata-section)
(nxml-compute-indent-in-delimited-token pos ""))
((eq xmltok-type 'processing-instruction)
(nxml-compute-indent-in-delimited-token pos "" "?>"))
(t
(goto-char pos)
(if (and (= (forward-line -1) 0)
(< xmltok-start (point)))
(back-to-indentation)
(goto-char xmltok-start))
(current-column))))
(defun nxml-compute-indent-in-start-tag (pos)
"Return the indent for a line that starts inside a start-tag.
Also for a line that starts inside an empty element.
POS is the position of the first non-whitespace character of the line.
This expects the xmltok-* variables to be set up as by `xmltok-forward'."
(let ((value-boundary (nxml-attribute-value-boundary pos))
(off 0))
(if value-boundary
;; inside an attribute value
(let ((value-start (car value-boundary)))
(goto-char pos)
(forward-line -1)
(if (< (point) value-start)
(goto-char value-start)
(back-to-indentation)))
;; outside an attribute value
(goto-char pos)
(while (and (= (forward-line -1) 0)
(nxml-attribute-value-boundary (point))))
(cond ((<= (point) xmltok-start)
(goto-char xmltok-start)
(setq off nxml-attribute-indent)
(let ((atts (xmltok-merge-attributes)))
(when atts
(let* ((att (car atts))
(start (xmltok-attribute-name-start att)))
(when (< start pos)
(goto-char start)
(setq off 0))))))
(t
(back-to-indentation))))
(+ (current-column) off)))
(defun nxml-attribute-value-boundary (pos)
"Return a pair (START . END) if POS is inside an attribute value.
Otherwise return nil. START and END are the positions of the start
and end of the attribute value containing POS. This expects the
xmltok-* variables to be set up as by `xmltok-forward'."
(let ((atts (xmltok-merge-attributes))
att value-start value-end value-boundary)
(while atts
(setq att (car atts))
(setq value-start (xmltok-attribute-value-start att))
(setq value-end (xmltok-attribute-value-end att))
(cond ((and value-start (< pos value-start))
(setq atts nil))
((and value-start value-end (<= pos value-end))
(setq value-boundary (cons value-start value-end))
(setq atts nil))
(t (setq atts (cdr atts)))))
value-boundary))
(defun nxml-compute-indent-in-delimited-token (pos open-delim close-delim)
"Return the indent for a line that starts inside a token with delimiters.
OPEN-DELIM and CLOSE-DELIM are strings giving the opening and closing
delimiters. POS is the position of the first non-whitespace character
of the line. This expects the xmltok-* variables to be set up as by
`xmltok-forward'."
(cond ((string= open-delim " arg 0)
(while (progn
(nxml-forward-single-balanced-item)
(> (setq arg (1- arg)) 0))))
((< arg 0)
(while (progn
(nxml-backward-single-balanced-item)
(< (setq arg (1+ arg)) 0))))))
(defun nxml-forward-single-balanced-item ()
(condition-case err
(goto-char (let ((end (nxml-token-after)))
(save-excursion
(while (eq xmltok-type 'space)
(goto-char end)
(setq end (nxml-token-after)))
(cond ((/= (point) xmltok-start)
(nxml-scan-forward-within end))
((and nxml-sexp-element-flag
(eq xmltok-type 'start-tag))
;; can't ever return nil here
(nxml-scan-element-forward xmltok-start))
((and nxml-sexp-element-flag
(memq xmltok-type
'(end-tag partial-end-tag)))
(error "Already at end of element"))
(t end)))))
(nxml-scan-error
(goto-char (cadr err))
(apply #'error (cddr err)))))
(defun nxml-backward-single-balanced-item ()
(condition-case err
(goto-char (let ((end (nxml-token-before)))
(save-excursion
(while (eq xmltok-type 'space)
(goto-char xmltok-start)
(setq end (nxml-token-before)))
(cond ((/= (point) end)
(nxml-scan-backward-within end))
((and nxml-sexp-element-flag
(eq xmltok-type 'end-tag))
;; can't ever return nil here
(nxml-scan-element-backward end)
xmltok-start)
((and nxml-sexp-element-flag
(eq xmltok-type 'start-tag))
(error "Already at start of element"))
(t xmltok-start)))))
(nxml-scan-error
(goto-char (cadr err))
(apply #'error (cddr err)))))
(defun nxml-scan-forward-within (end)
(setq end (- end (nxml-end-delimiter-length xmltok-type)))
(when (<= end (point))
(error "Already at end of %s"
(nxml-token-type-friendly-name xmltok-type)))
(cond ((memq xmltok-type '(start-tag
empty-element
partial-start-tag
partial-empty-element))
(if (< (point) xmltok-name-end)
xmltok-name-end
(let ((att (nxml-find-following-attribute)))
(cond ((not att) end)
((and (xmltok-attribute-value-start att)
(<= (xmltok-attribute-value-start att)
(point)))
(nxml-scan-forward-in-attribute-value att))
((xmltok-attribute-value-end att)
(1+ (xmltok-attribute-value-end att)))
((save-excursion
(goto-char (xmltok-attribute-name-end att))
(looking-at "[ \t\r\n]*="))
(match-end 0))
(t (xmltok-attribute-name-end att))))))
((and (eq xmltok-type 'processing-instruction)
(< (point) xmltok-name-end))
xmltok-name-end)
(t end)))
(defun nxml-scan-backward-within (_end)
(setq xmltok-start
(+ xmltok-start
(nxml-start-delimiter-length xmltok-type)))
(when (<= (point) xmltok-start)
(error "Already at start of %s"
(nxml-token-type-friendly-name xmltok-type)))
(cond ((memq xmltok-type '(start-tag
empty-element
partial-start-tag
partial-empty-element))
(let ((att (nxml-find-preceding-attribute)))
(cond ((not att) xmltok-start)
((and (xmltok-attribute-value-start att)
(<= (xmltok-attribute-value-start att)
(point))
(<= (point)
(xmltok-attribute-value-end att)))
(nxml-scan-backward-in-attribute-value att))
(t (xmltok-attribute-name-start att)))))
((and (eq xmltok-type 'processing-instruction)
(let ((content-start (save-excursion
(goto-char xmltok-name-end)
(skip-chars-forward " \r\t\n")
(point))))
(and (< content-start (point))
content-start))))
(t xmltok-start)))
(defun nxml-scan-forward-in-attribute-value (att)
(when (= (point) (xmltok-attribute-value-end att))
(error "Already at end of attribute value"))
(let ((refs (xmltok-attribute-refs att))
ref)
(while refs
(setq ref (car refs))
(if (< (point) (aref ref 2))
(setq refs nil)
(setq ref nil)
(setq refs (cdr refs))))
(cond ((not ref)
(xmltok-attribute-value-end att))
((< (point) (aref ref 1))
(aref ref 1))
((= (point) (aref ref 1))
(aref ref 2))
(t
(let ((end (- (aref ref 2)
(nxml-end-delimiter-length (aref ref 0)))))
(if (< (point) end)
end
(error "Already at end of %s"
(nxml-token-type-friendly-name (aref ref 0)))))))))
(defun nxml-scan-backward-in-attribute-value (att)
(when (= (point) (xmltok-attribute-value-start att))
(error "Already at start of attribute value"))
(let ((refs (reverse (xmltok-attribute-refs att)))
ref)
(while refs
(setq ref (car refs))
(if (< (aref ref 1) (point))
(setq refs nil)
(setq ref nil)
(setq refs (cdr refs))))
(cond ((not ref)
(xmltok-attribute-value-start att))
((< (aref ref 2) (point))
(aref ref 2))
((= (point) (aref ref 2))
(aref ref 1))
(t
(let ((start (+ (aref ref 1)
(nxml-start-delimiter-length (aref ref 0)))))
(if (< start (point))
start
(error "Already at start of %s"
(nxml-token-type-friendly-name (aref ref 0)))))))))
(defun nxml-find-following-attribute ()
(let ((ret nil)
(atts (or xmltok-attributes xmltok-namespace-attributes))
(more-atts (and xmltok-attributes xmltok-namespace-attributes)))
(while atts
(let* ((att (car atts))
(name-start (xmltok-attribute-name-start att)))
(cond ((and (<= name-start (point))
(xmltok-attribute-value-end att)
;; <= because end is before quote
(<= (point) (xmltok-attribute-value-end att)))
(setq atts nil)
(setq ret att))
((and (< (point) name-start)
(or (not ret)
(< name-start
(xmltok-attribute-name-start ret))))
(setq ret att))))
(setq atts (cdr atts))
(unless atts
(setq atts more-atts)
(setq more-atts nil)))
ret))
(defun nxml-find-preceding-attribute ()
(let ((ret nil)
(atts (or xmltok-attributes xmltok-namespace-attributes))
(more-atts (and xmltok-attributes xmltok-namespace-attributes)))
(while atts
(let* ((att (car atts))
(name-start (xmltok-attribute-name-start att)))
(cond ((and (< name-start (point))
(xmltok-attribute-value-end att)
;; <= because end is before quote
(<= (point) (xmltok-attribute-value-end att)))
(setq atts nil)
(setq ret att))
((and (< name-start (point))
(or (not ret)
(< (xmltok-attribute-name-start ret)
name-start)))
(setq ret att))))
(setq atts (cdr atts))
(unless atts
(setq atts more-atts)
(setq more-atts nil)))
ret))
(defun nxml-up-element (&optional arg)
(interactive "^p")
(or arg (setq arg 1))
(if (< arg 0)
(nxml-backward-up-element (- arg))
(condition-case err
(while (and (> arg 0)
(< (point) (point-max)))
(let ((token-end (nxml-token-after)))
(goto-char (cond ((or (memq xmltok-type '(end-tag
partial-end-tag))
(and (memq xmltok-type
'(empty-element
partial-empty-element))
(< xmltok-start (point))))
token-end)
((nxml-scan-element-forward
(if (and (eq xmltok-type 'start-tag)
(= (point) xmltok-start))
xmltok-start
token-end)
t))
(t (error "No parent element")))))
(setq arg (1- arg)))
(nxml-scan-error
(goto-char (cadr err))
(apply #'error (cddr err))))))
(defun nxml-backward-up-element (&optional arg)
(interactive "^p")
(or arg (setq arg 1))
(if (< arg 0)
(nxml-up-element (- arg))
(condition-case err
(while (and (> arg 0)
(< (point-min) (point)))
(let ((token-end (nxml-token-before)))
(goto-char (cond ((or (memq xmltok-type '(start-tag
partial-start-tag))
(and (memq xmltok-type
'(empty-element
partial-empty-element))
(< (point) token-end)))
xmltok-start)
((nxml-scan-element-backward
(if (and (eq xmltok-type 'end-tag)
(= (point) token-end))
token-end
xmltok-start)
t)
xmltok-start)
(t (error "No parent element")))))
(setq arg (1- arg)))
(nxml-scan-error
(goto-char (cadr err))
(apply #'error (cddr err))))))
(defun nxml-down-element (&optional arg)
"Move forward down into the content of an element.
With ARG, do this that many times.
Negative ARG means move backward but still down."
(interactive "^p")
(or arg (setq arg 1))
(if (< arg 0)
(nxml-backward-down-element (- arg))
(while (> arg 0)
(goto-char
(let ((token-end (nxml-token-after)))
(save-excursion
(goto-char token-end)
(while (progn
(when (memq xmltok-type '(nil end-tag partial-end-tag))
(error "No following start-tags in this element"))
(not (memq xmltok-type '(start-tag partial-start-tag))))
(nxml-tokenize-forward))
(point))))
(setq arg (1- arg)))))
(defun nxml-backward-down-element (&optional arg)
(interactive "^p")
(or arg (setq arg 1))
(if (< arg 0)
(nxml-down-element (- arg))
(while (> arg 0)
(goto-char
(save-excursion
(nxml-token-before)
(goto-char xmltok-start)
(while (progn
(when (memq xmltok-type '(start-tag
partial-start-tag
prolog
nil))
(error "No preceding end-tags in this element"))
(not (memq xmltok-type '(end-tag partial-end-tag))))
(if (or (<= (point) nxml-prolog-end)
(not (search-backward "<" nxml-prolog-end t)))
(setq xmltok-type nil)
(nxml-move-outside-backwards)
(xmltok-forward)))
xmltok-start))
(setq arg (1- arg)))))
(defun nxml-forward-element (&optional arg)
"Move forward over one element.
With ARG, do it that many times.
Negative ARG means move backward."
(interactive "^p")
(or arg (setq arg 1))
(if (< arg 0)
(nxml-backward-element (- arg))
(condition-case err
(while (and (> arg 0)
(< (point) (point-max)))
(goto-char
(or (nxml-scan-element-forward (nxml-token-before))
(error "No more elements")))
(setq arg (1- arg)))
(nxml-scan-error
(goto-char (cadr err))
(apply #'error (cddr err))))))
(defun nxml-backward-element (&optional arg)
"Move backward over one element.
With ARG, do it that many times.
Negative ARG means move forward."
(interactive "^p")
(or arg (setq arg 1))
(if (< arg 0)
(nxml-forward-element (- arg))
(condition-case err
(while (and (> arg 0)
(< (point-min) (point)))
(goto-char
(or (and (nxml-scan-element-backward (progn
(nxml-token-after)
xmltok-start))
xmltok-start)
(error "No preceding elements")))
(setq arg (1- arg)))
(nxml-scan-error
(goto-char (cadr err))
(apply #'error (cddr err))))))
(defun nxml-mark-token-after ()
(interactive)
(push-mark (nxml-token-after) nil t)
(goto-char xmltok-start)
(message "Marked %s" xmltok-type))
;;; Paragraphs
(defun nxml-mark-paragraph ()
"Put point at beginning of this paragraph, mark at end.
The paragraph marked is the one that contains point or follows point."
(interactive)
(nxml-forward-paragraph)
(push-mark nil t t)
(nxml-backward-paragraph))
(defun nxml-forward-paragraph (&optional arg)
(interactive "^p")
(or arg (setq arg 1))
(cond ((< arg 0)
(nxml-backward-paragraph (- arg)))
((> arg 0)
(forward-line 0)
(while (and (nxml-forward-single-paragraph)
(> (setq arg (1- arg)) 0))))))
(defun nxml-backward-paragraph (&optional arg)
(interactive "^p")
(or arg (setq arg 1))
(cond ((< arg 0)
(nxml-forward-paragraph (- arg)))
((> arg 0)
(unless (bolp)
(let ((inhibit-field-text-motion t))
(end-of-line)))
(while (and (nxml-backward-single-paragraph)
(> (setq arg (1- arg)) 0))))))
(defun nxml-forward-single-paragraph ()
"Move forward over a single paragraph.
Return nil at end of buffer, t otherwise."
(let* ((token-end (nxml-token-after))
(offset (- (point) xmltok-start))
pos had-data)
(goto-char token-end)
(while (and (< (point) (point-max))
(not (setq pos
(nxml-paragraph-end-pos had-data offset))))
(when (nxml-token-contains-data-p offset)
(setq had-data t))
(nxml-tokenize-forward)
(setq offset 0))
(when pos (goto-char pos))))
(defun nxml-backward-single-paragraph ()
"Move backward over a single paragraph.
Return nil at start of buffer, t otherwise."
(let* ((token-end (nxml-token-before))
(offset (- token-end (point)))
(last-tag-pos xmltok-start)
pos had-data last-data-pos)
(goto-char token-end)
(unless (setq pos (nxml-paragraph-start-pos nil offset))
(setq had-data (nxml-token-contains-data-p nil offset))
(goto-char xmltok-start)
(while (and (not pos) (< (point-min) (point)))
(cond ((search-backward "<" nxml-prolog-end t)
(nxml-move-outside-backwards)
(save-excursion
(while (< (point) last-tag-pos)
(xmltok-forward)
(when (and (not had-data) (nxml-token-contains-data-p))
(setq pos nil)
(setq last-data-pos xmltok-start))
(let ((tem (nxml-paragraph-start-pos had-data 0)))
(when tem (setq pos tem)))))
(when (and (not had-data) last-data-pos (not pos))
(setq had-data t)
(save-excursion
(while (< (point) last-data-pos)
(xmltok-forward))
(let ((tem (nxml-paragraph-start-pos had-data 0)))
(when tem (setq pos tem)))))
(setq last-tag-pos (point)))
(t (goto-char (point-min))))))
(when pos (goto-char pos))))
(defun nxml-token-contains-data-p (&optional start end)
(setq start (+ xmltok-start (or start 0)))
(setq end (- (point) (or end 0)))
(when (eq xmltok-type 'cdata-section)
(setq start (max start (+ xmltok-start 9)))
(setq end (min end (- (point) 3))))
(or (and (eq xmltok-type 'data)
(eq start xmltok-start)
(eq end (point)))
(eq xmltok-type 'char-ref)
(and (memq xmltok-type '(data cdata-section))
(< start end)
(save-excursion
(goto-char start)
(re-search-forward "[^ \t\r\n]" end t)))))
(defun nxml-paragraph-end-pos (had-data offset)
"Return the position of the paragraph end if contained in the current token.
Return nil if the current token does not contain the paragraph end.
Only characters after OFFSET from the start of the token are eligible.
HAD-DATA says whether there have been non-whitespace data characters yet."
(cond ((not had-data)
(cond ((memq xmltok-type '(data cdata-section))
(save-excursion
(let ((end (point)))
(goto-char (+ xmltok-start
(max (if (eq xmltok-type 'cdata-section)
9
0)
offset)))
(and (re-search-forward "[^ \t\r\n]" end t)
(re-search-forward "^[ \t]*$" end t)
(match-beginning 0)))))
((and (eq xmltok-type 'comment)
(nxml-token-begins-line-p)
(nxml-token-ends-line-p))
(save-excursion
(let ((end (point)))
(goto-char (+ xmltok-start (max 4 offset)))
(when (re-search-forward "[^ \t\r\n]" (- end 3) t)
(if (re-search-forward "^[ \t]*$" end t)
(match-beginning 0)
(goto-char (- end 3))
(skip-chars-backward " \t")
(unless (bolp)
(beginning-of-line 2))
(point))))))))
((memq xmltok-type '(data space cdata-section))
(save-excursion
(let ((end (point)))
(goto-char (+ xmltok-start offset))
(and (re-search-forward "^[ \t]*$" end t)
(match-beginning 0)))))
((and (memq xmltok-type '(start-tag
end-tag
empty-element
comment
processing-instruction
entity-ref))
(nxml-token-begins-line-p)
(nxml-token-ends-line-p))
(save-excursion
(goto-char xmltok-start)
(skip-chars-backward " \t")
(point)))
((and (eq xmltok-type 'end-tag)
(looking-at "[ \t]*$")
(not (nxml-in-mixed-content-p t)))
(save-excursion
(or (search-forward "\n" nil t)
(point-max))))))
(defun nxml-paragraph-start-pos (had-data offset)
"Return the position of the paragraph start if contained in the current token.
Return nil if the current token does not contain the paragraph start.
Only characters before OFFSET from the end of the token are eligible.
HAD-DATA says whether there have been non-whitespace data characters yet."
(cond ((not had-data)
(cond ((memq xmltok-type '(data cdata-section))
(save-excursion
(goto-char (- (point)
(max (if (eq xmltok-type 'cdata-section)
3
0)
offset)))
(and (re-search-backward "[^ \t\r\n]" xmltok-start t)
(re-search-backward "^[ \t]*$" xmltok-start t)
(match-beginning 0))))
((and (eq xmltok-type 'comment)
(nxml-token-ends-line-p)
(nxml-token-begins-line-p))
(save-excursion
(goto-char (- (point) (max 3 offset)))
(when (and (< (+ xmltok-start 4) (point))
(re-search-backward "[^ \t\r\n]"
(+ xmltok-start 4)
t))
(if (re-search-backward "^[ \t]*$" xmltok-start t)
(match-beginning 0)
(goto-char xmltok-start)
(if (looking-at "