;;; nxml-mode.el --- a new XML mode -*- lexical-binding:t -*- ;; Copyright (C) 2003-2004, 2007-2025 Free Software Foundation, Inc. ;; Author: James Clark ;; Keywords: text, hypermedia, languages, XML ;; This file is part of GNU Emacs. ;; GNU Emacs is free software: you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by ;; the Free Software Foundation, either version 3 of the License, or ;; (at your option) any later version. ;; GNU Emacs is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; GNU General Public License for more details. ;; You should have received a copy of the GNU General Public License ;; along with GNU Emacs. If not, see . ;;; Commentary: ;; See nxml-rap.el for description of parsing strategy. ;;; Code: (eval-when-compile (require 'cl-lib)) (require 'xmltok) (require 'nxml-enc) (require 'nxml-util) (require 'nxml-rap) (require 'nxml-outln) ;; nxml-mode calls rng-nxml-mode-init, which is autoloaded from rng-nxml. ;; So we might as well just require it and silence the compiler. (provide 'nxml-mode) ; avoid recursive require (require 'rng-nxml) (require 'sgml-mode) ;;; Customization (defgroup nxml nil "New XML editing mode." :link '(custom-manual "(nxml-mode) Top") :group 'languages) (defgroup nxml-faces nil "Faces for XML syntax highlighting." :group 'nxml) (defcustom nxml-char-ref-display-glyph-flag t "Non-nil means display glyph following character reference. The glyph is displayed in face `nxml-glyph'." :group 'nxml :type 'boolean :safe #'booleanp) (defcustom nxml-sexp-element-flag t "Non-nil means sexp commands treat an element as a single expression." :version "27.1" ; nil -> t :group 'nxml :type 'boolean :safe #'booleanp) (defcustom nxml-slash-auto-complete-flag nil "Non-nil means typing a slash automatically completes the end-tag. This is used by `nxml-electric-slash'." :group 'nxml :type 'boolean :safe #'booleanp) (defcustom nxml-child-indent 2 "Indentation for the children of an element relative to the start-tag. This only applies when the line or lines containing the start-tag contains nothing else other than that start-tag." :group 'nxml :type 'integer :safe #'integerp) (defcustom nxml-attribute-indent 4 "Indentation for the attributes of an element relative to the start-tag. This only applies when the first attribute of a tag starts a line. In other cases, the first attribute on one line is indented the same as the first attribute on the previous line." :group 'nxml :type 'integer :safe #'integerp) (defcustom nxml-bind-meta-tab-to-complete-flag t "Non-nil means to use nXML completion in \\[completion-at-point]." :group 'nxml :type 'boolean :safe #'booleanp) (defcustom nxml-prefer-utf-16-to-utf-8-flag nil "Non-nil means prefer UTF-16 to UTF-8 when saving a buffer. This is used only when a buffer does not contain an encoding declaration and when its current `buffer-file-coding-system' specifies neither UTF-16 nor UTF-8." :group 'nxml :type 'boolean :safe #'booleanp) (defcustom nxml-prefer-utf-16-little-to-big-endian-flag (eq system-type 'windows-nt) "Non-nil means prefer little-endian to big-endian byte-order for UTF-16. This is used only for saving a buffer; when reading the byte-order is auto-detected. It may be relevant both when there is no encoding declaration and when the encoding declaration specifies `UTF-16'." :group 'nxml :type 'boolean :safe #'booleanp) (defcustom nxml-default-buffer-file-coding-system nil "Default value for `buffer-file-coding-system' for a buffer for a new file. A value of nil means use the default value of `buffer-file-coding-system' as normal. A buffer's `buffer-file-coding-system' affects what \\[nxml-insert-xml-declaration] inserts." :group 'nxml :type 'coding-system :safe #'coding-system-p) (defcustom nxml-auto-insert-xml-declaration-flag nil "Non-nil means automatically insert an XML declaration in a new file. The XML declaration is inserted using `nxml-insert-xml-declaration'." :group 'nxml :type 'boolean :safe #'booleanp) (defface nxml-delimited-data '((t (:inherit font-lock-doc-face))) "Face used to highlight data enclosed between delimiters. This is not used directly, but only via inheritance by other faces." :group 'nxml-faces) (defface nxml-name '((t (:inherit font-lock-builtin-face))) "Face used to highlight various names. This includes element and attribute names, processing instruction targets and the CDATA keyword in a CDATA section. This is not used directly, but only via inheritance by other faces." :group 'nxml-faces) (defface nxml-ref '((t (:inherit font-lock-constant-face))) "Face used to highlight character and entity references. This is not used directly, but only via inheritance by other faces." :group 'nxml-faces) (defface nxml-text '((t (:inherit default))) "Face used to highlight text." :group 'nxml-faces) (defface nxml-delimiter '((t (:inherit nxml-text))) "Face used to highlight delimiters. This is not used directly, but only via inheritance by other faces." :group 'nxml-faces) (defface nxml-processing-instruction-delimiter '((t (:inherit nxml-delimiter))) "Face used for the delimiters of processing instructions, i.e., ." :group 'nxml-faces) (defface nxml-processing-instruction-target '((t (:inherit font-lock-keyword-face))) "Face used for the target of processing instructions." :group 'nxml-faces) (defface nxml-processing-instruction-content '((t (:inherit nxml-delimited-data))) "Face used for the content of processing instructions." :group 'nxml-faces) (defface nxml-cdata-section-delimiter '((t (:inherit nxml-delimiter))) "Face used for the delimiters of CDATA sections, i.e., ." :group 'nxml-faces) (defface nxml-cdata-section-CDATA '((t (:inherit nxml-name))) "Face used for the CDATA keyword in CDATA sections." :group 'nxml-faces) (defface nxml-cdata-section-content '((t (:inherit nxml-text))) "Face used for the content of CDATA sections." :group 'nxml-faces) (defface nxml-char-ref-number '((t (:inherit nxml-ref))) "Face used for the number in character references. This includes the `x' in hex references." :group 'nxml-faces) (defface nxml-char-ref-delimiter '((t (:inherit nxml-ref))) "Face used for the delimiters of character references, i.e., &# and ;." :group 'nxml-faces) (defface nxml-entity-ref-name '((t (:inherit nxml-ref))) "Face used for the entity name in general entity references." :group 'nxml-faces) (defface nxml-entity-ref-delimiter '((t (:inherit nxml-ref))) "Face used for the delimiters of entity references, i.e., & and ;." :group 'nxml-faces) (defface nxml-tag-delimiter '((t (:inherit nxml-delimiter))) "Face used for the angle brackets delimiting tags. `nxml-tag-slash' is used for slashes." :group 'nxml-faces) (defface nxml-tag-slash '((t (:inherit nxml-tag-delimiter))) "Face used for slashes in tags, both in end-tags and empty-elements." :group 'nxml-faces) (defface nxml-element-prefix '((t (:inherit nxml-name))) "Face used for the prefix of elements." :group 'nxml-faces) (defface nxml-element-colon '((t (:inherit nxml-delimiter))) "Face used for the colon in element names." :group 'nxml-faces) (defface nxml-element-local-name '((t (:inherit font-lock-function-name-face))) "Face used for the local name of elements." :group 'nxml-faces) (defface nxml-attribute-prefix '((t (:inherit nxml-name))) "Face used for the prefix of attributes." :group 'nxml-faces) (defface nxml-attribute-colon '((t (:inherit nxml-delimiter))) "Face used for the colon in attribute names." :group 'nxml-faces) (defface nxml-attribute-local-name '((t (:inherit font-lock-variable-name-face))) "Face used for the local name of attributes." :group 'nxml-faces) (defface nxml-namespace-attribute-xmlns '((t (:inherit nxml-attribute-prefix))) "Face used for `xmlns' in namespace attributes." :group 'nxml-faces) (defface nxml-namespace-attribute-colon '((t (:inherit nxml-attribute-colon))) "Face used for the colon in namespace attributes." :group 'nxml-faces) (defface nxml-namespace-attribute-prefix '((t (:inherit nxml-attribute-local-name))) "Face used for the prefix declared in namespace attributes." :group 'nxml-faces) (defface nxml-attribute-value '((t (:inherit font-lock-string-face))) "Face used for the value of attributes." :group 'nxml-faces) (defface nxml-attribute-value-delimiter '((t (:inherit nxml-attribute-value))) "Face used for the delimiters of attribute values." :group 'nxml-faces) (defface nxml-prolog-literal-delimiter '((t (:inherit nxml-delimited-data))) "Face used for the delimiters of literals in the prolog." :group 'nxml-faces) (defface nxml-prolog-literal-content '((t (:inherit nxml-delimited-data))) "Face used for the content of literals in the prolog." :group 'nxml-faces) (defface nxml-prolog-keyword '((t (:inherit font-lock-keyword-face))) "Face used for keywords in the prolog." :group 'nxml-faces) (defface nxml-markup-declaration-delimiter '((t (:inherit nxml-delimiter))) "Face used for the delimiters of markup declarations in the prolog. The delimiters are ." :group 'nxml-faces) (defface nxml-hash '((t (:inherit nxml-name))) "Face used for # before a name in the prolog." :group 'nxml-faces) (defface nxml-glyph '((((type x)) (:family "misc-fixed" :background "light grey" :foreground "black" :weight normal :slant normal)) (t (:background "light grey" :foreground "black" :weight normal :slant normal))) "Face used for glyph for char references." :group 'nxml-faces) ;;; Global variables (defvar-local nxml-parent-document nil "The parent document for a part of a modular document. Use `nxml-parent-document-set' to set it.") (put 'nxml-parent-document 'safe-local-variable 'stringp) (defvar-local nxml-prolog-regions nil "List of regions in the prolog to be fontified. See the function `xmltok-forward-prolog' for more information.") (defvar-local nxml-degraded nil "Non-nil if currently operating in degraded mode. Degraded mode is enabled when an internal error is encountered in the fontification or after-change functions.") (defvar nxml-completion-hook nil "Hook run by `nxml-complete'. This hook is run until success.") (defvar nxml-in-mixed-content-hook nil "Hook to determine whether point is in mixed content. The hook is called without arguments. It should return nil if it is definitely not mixed; non-nil otherwise. The hook will be run until one of the functions returns nil.") (defvar nxml-mixed-scan-distance 4000 "Maximum distance from point to scan when checking for mixed content.") (defvar nxml-end-tag-indent-scan-distance 4000 "Maximum distance from point to scan backwards when indenting end-tag.") (defvar-local nxml-char-ref-extra-display t "Non-nil means display extra information for character references. The extra information consists of a tooltip with the character name and, if `nxml-char-ref-display-glyph-flag' is non-nil, a glyph corresponding to the referenced character following the character reference.") (defvar-keymap nxml-mode-map :doc "Keymap for `nxml-mode'." "C-M-u" #'nxml-backward-up-element "C-M-d" #'nxml-down-element "C-M-n" #'nxml-forward-element "C-M-p" #'nxml-backward-element "M-{" #'nxml-backward-paragraph "M-}" #'nxml-forward-paragraph "M-h" #'nxml-mark-paragraph "C-c C-f" #'nxml-finish-element "C-c ]" #'nxml-finish-element "C-c /" #'nxml-finish-element "C-c C-m" #'nxml-split-element "C-c C-b" #'nxml-balanced-close-start-tag-block "C-c C-i" #'nxml-balanced-close-start-tag-inline "C-c C-x" #'nxml-insert-xml-declaration "C-c C-d" #'nxml-dynamic-markup-word ;; u is for Unicode "C-c C-u" #'nxml-insert-named-char "C-c C-o" nxml-outline-prefix-map "/" #'nxml-electric-slash "S-" #'nxml-mouse-hide-direct-text-content) (defvar nxml-font-lock-keywords '(nxml-fontify-matcher) "Default font lock keywords for `nxml-mode'.") (defsubst nxml-set-face (start end face) (when (and face (< start end)) ;; Prepend, so the character reference highlighting takes precedence over ;; the string highlighting applied syntactically. (font-lock-prepend-text-property start end 'face face))) (defun nxml-parent-document-set (parent-document) "Set `nxml-parent-document' and inherit the DTD &c." ;; FIXME: this does not work. ;; the idea is that by inheriting some variables from the parent, ;; `rng-validate-mode' will validate entities declared in the parent. ;; alas, the most interesting variables (`rng-compile-table' et al) ;; are circular and cannot be printed even with `print-circle'. (interactive "fParent document") (let (dtd current-schema current-schema-file-name compile-table ipattern-table last-ipattern-index) (when (string= (file-truename parent-document) (file-truename buffer-file-name)) (error "Parent document cannot be the same as the document")) (with-current-buffer (find-file-noselect parent-document) (setq dtd rng-dtd current-schema rng-current-schema current-schema-file-name rng-current-schema-file-name compile-table rng-compile-table ipattern-table rng-ipattern-table last-ipattern-index rng-last-ipattern-index parent-document buffer-file-name)) (setq rng-dtd dtd rng-current-schema current-schema rng-current-schema-file-name current-schema-file-name rng-compile-table compile-table rng-ipattern-table ipattern-table rng-last-ipattern-index last-ipattern-index nxml-parent-document parent-document) (message "Set parent document to %s" parent-document) (when rng-validate-mode (rng-validate-while-idle (current-buffer))))) (defvar nxml-prolog-end) ;; nxml-rap.el (defun nxml-syntax-propertize (start end) "Syntactic keywords for `nxml-mode'." ;; Like `sgml-syntax-propertize', but rescan prolog if needed. (when (< start nxml-prolog-end) (nxml-scan-prolog)) (sgml-syntax-propertize start end)) (defvar tildify-space-string) (defvar tildify-foreach-region-function) ;;;###autoload (define-derived-mode nxml-mode text-mode "nXML" ;; We use C-c C-i instead of \\[nxml-balanced-close-start-tag-inline] ;; because Emacs turns C-c C-i into C-c TAB which is hard to type and ;; not mnemonic. "Major mode for editing XML. \\ \\[nxml-finish-element] finishes the current element by inserting an end-tag. C-c C-i closes a start-tag with `>' and then inserts a balancing end-tag leaving point between the start-tag and end-tag. \\[nxml-balanced-close-start-tag-block] is similar but for block rather than inline elements: the start-tag, point, and end-tag are all left on separate lines. If `nxml-slash-auto-complete-flag' is non-nil, then inserting a `") ("<" . ">")))) (setq-local mode-line-process '((nxml-degraded "/degraded"))) ;; We'll determine the fill prefix ourselves (setq-local adaptive-fill-mode nil) (setq-local forward-sexp-function #'nxml-forward-balanced-item) (setq-local indent-line-function #'nxml-indent-line) (setq-local fill-paragraph-function #'nxml-do-fill-paragraph) ;; Comment support ;; This doesn't seem to work too well; ;; I think we should probably roll our own nxml-comment-dwim function. (setq-local comment-indent-function #'nxml-indent-line) (setq-local comment-start "") (setq-local comment-end-skip "[ \t\r\n]*-->") (setq-local comment-line-break-function #'nxml-newline-and-indent) (setq-local comment-quote-nested-function #'nxml-comment-quote-nested) (setq-local comment-continue "") ; avoid double-hyphens as a padding (setq-local hs-block-start-regexp "<[^/>]*?") (setq-local hs-block-end-regexp "]*[^/]>") (setq-local hs-c-start-regexp "")) ((eq xmltok-type 'cdata-section) (nxml-compute-indent-in-delimited-token pos "")) ((eq xmltok-type 'processing-instruction) (nxml-compute-indent-in-delimited-token pos "")) (t (goto-char pos) (if (and (= (forward-line -1) 0) (< xmltok-start (point))) (back-to-indentation) (goto-char xmltok-start)) (current-column)))) (defun nxml-compute-indent-in-start-tag (pos) "Return the indent for a line that starts inside a start-tag. Also for a line that starts inside an empty element. POS is the position of the first non-whitespace character of the line. This expects the xmltok-* variables to be set up as by `xmltok-forward'." (let ((value-boundary (nxml-attribute-value-boundary pos)) (off 0)) (if value-boundary ;; inside an attribute value (let ((value-start (car value-boundary))) (goto-char pos) (forward-line -1) (if (< (point) value-start) (goto-char value-start) (back-to-indentation))) ;; outside an attribute value (goto-char pos) (while (and (= (forward-line -1) 0) (nxml-attribute-value-boundary (point)))) (cond ((<= (point) xmltok-start) (goto-char xmltok-start) (setq off nxml-attribute-indent) (let ((atts (xmltok-merge-attributes))) (when atts (let* ((att (car atts)) (start (xmltok-attribute-name-start att))) (when (< start pos) (goto-char start) (setq off 0)))))) (t (back-to-indentation)))) (+ (current-column) off))) (defun nxml-attribute-value-boundary (pos) "Return a pair (START . END) if POS is inside an attribute value. Otherwise return nil. START and END are the positions of the start and end of the attribute value containing POS. This expects the xmltok-* variables to be set up as by `xmltok-forward'." (let ((atts (xmltok-merge-attributes)) att value-start value-end value-boundary) (while atts (setq att (car atts)) (setq value-start (xmltok-attribute-value-start att)) (setq value-end (xmltok-attribute-value-end att)) (cond ((and value-start (< pos value-start)) (setq atts nil)) ((and value-start value-end (<= pos value-end)) (setq value-boundary (cons value-start value-end)) (setq atts nil)) (t (setq atts (cdr atts))))) value-boundary)) (defun nxml-compute-indent-in-delimited-token (pos open-delim close-delim) "Return the indent for a line that starts inside a token with delimiters. OPEN-DELIM and CLOSE-DELIM are strings giving the opening and closing delimiters. POS is the position of the first non-whitespace character of the line. This expects the xmltok-* variables to be set up as by `xmltok-forward'." (cond ((let ((end (+ pos (length close-delim)))) (and (<= end (point-max)) (string= (buffer-substring-no-properties pos end) close-delim))) (goto-char xmltok-start)) ((progn (goto-char pos) (forward-line -1) (while (looking-at "^[[:blank:]]*$") (forward-line -1)) (<= (point) xmltok-start)) (goto-char (+ xmltok-start (length open-delim))) (when (and (string= open-delim "