1
Fork 0
mirror of git://git.sv.gnu.org/emacs.git synced 2026-01-03 02:31:03 -08:00

Make C++ digit separators work. Amend the handling of single quotes generally

Single quotes, even in strings and comments, are now marked with the
"punctuation" syntax-table property, except where they are validly bounding a
character literal.  They are font locked with font-lock-warning-face except
where they are valid.  This is done in C, C++, ObjC, and Java Modes.

* lisp/progmodes/cc-defs.el (c-clear-char-property-with-value-on-char-function)
(c-clear-char-property-with-value-on-char, c-put-char-properties-on-char): New
functions/macros.

* lisp/progmodes/cc-fonts.el (c-font-lock-invalid-single-quotes): New function.
(c-basic-matchers-before): invoke c-font-lock-invalid-single-quotes.

* lisp/progmodes/cc-langs.el (c-get-state-before-change-functions): Remove
c-before-after-change-digit-quote from wherever it occurs.  Insert
c-parse-quotes-before-change into the entries for the languages where it is
needed.
(c-before-font-lock-functions): Remove c-before-after-change-digit-quote from
wherever it occurs.  Insert c-parse-quotes-after-change into the entries for
the languages which need it.
(c-has-quoted-numbers): New lang-defconst/-defvar.

* lisp/progmodes/cc-mode.el (c-before-after-change-digit-quote): Remove.
(c-maybe-quoted-number-head, c-maybe-quoted-number-tail)
(c-maybe-quoted-number): New defconsts.
(c-quoted-number-head-before-point, c-quoted-number-tail-after-point)
(c-quoted-number-straddling-point, c-parse-quotes-before-change)
(c-parse-quotes-after-change): New functions.
This commit is contained in:
Alan Mackenzie 2017-07-01 15:43:07 +00:00
parent e620bbe38e
commit 59d07875df
4 changed files with 311 additions and 97 deletions

View file

@ -1171,6 +1171,63 @@ been put there by c-put-char-property. POINT remains unchanged."
nil ,from ,to ,value nil -property-)) nil ,from ,to ,value nil -property-))
;; GNU Emacs ;; GNU Emacs
`(c-clear-char-property-with-value-function ,from ,to ,property ,value))) `(c-clear-char-property-with-value-function ,from ,to ,property ,value)))
(defun c-clear-char-property-with-value-on-char-function (from to property
value char)
"Remove all text-properties PROPERTY with value VALUE on
characters with value CHAR from the region [FROM, TO), as tested
by `equal'. These properties are assumed to be over individual
characters, having been put there by c-put-char-property. POINT
remains unchanged."
(let ((place from)
)
(while ; loop round occurrences of (PROPERTY VALUE)
(progn
(while ; loop round changes in PROPERTY till we find VALUE
(and
(< place to)
(not (equal (get-text-property place property) value)))
(setq place (c-next-single-property-change place property nil to)))
(< place to))
(if (eq (char-after place) char)
(remove-text-properties place (1+ place) (cons property nil)))
;; Do we have to do anything with stickiness here?
(setq place (1+ place)))))
(defmacro c-clear-char-property-with-value-on-char (from to property value char)
"Remove all text-properties PROPERTY with value VALUE on
characters with value CHAR from the region [FROM, TO), as tested
by `equal'. These properties are assumed to be over individual
characters, having been put there by c-put-char-property. POINT
remains unchanged."
(if c-use-extents
;; XEmacs
`(let ((-property- ,property)
(-char- ,char))
(map-extents (lambda (ext val)
(if (and (equal (extent-property ext -property-) val)
(eq (char-after
(extent-start-position ext))
-char-))
(delete-extent ext)))
nil ,from ,to ,value nil -property-))
;; Gnu Emacs
`(c-clear-char-property-with-value-on-char-function ,from ,to ,property
,value ,char)))
(defmacro c-put-char-properties-on-char (from to property value char)
;; This needs to be a macro because `property' passed to
;; `c-put-char-property' must be a constant.
"Put the text property PROPERTY with value VALUE on characters
with value CHAR in the region [FROM to)."
`(let ((skip-string (concat "^" (list ,char)))
(-to- ,to))
(save-excursion
(goto-char ,from)
(while (progn (skip-chars-forward skip-string -to-)
(< (point) -to-))
(c-put-char-property (point) ,property ,value)
(forward-char)))))
;; Macros to put overlays (Emacs) or extents (XEmacs) on buffer text. ;; Macros to put overlays (Emacs) or extents (XEmacs) on buffer text.
;; For our purposes, these are characterized by being possible to ;; For our purposes, these are characterized by being possible to
@ -1228,6 +1285,8 @@ been put there by c-put-char-property. POINT remains unchanged."
(def-edebug-spec c-put-char-property t) (def-edebug-spec c-put-char-property t)
(def-edebug-spec c-get-char-property t) (def-edebug-spec c-get-char-property t)
(def-edebug-spec c-clear-char-property t) (def-edebug-spec c-clear-char-property t)
(def-edebug-spec c-clear-char-property-with-value-on-char t)
(def-edebug-spec c-put-char-properties-on-char t)
(def-edebug-spec c-clear-char-properties t) (def-edebug-spec c-clear-char-properties t)
(def-edebug-spec c-put-overlay t) (def-edebug-spec c-put-overlay t)
(def-edebug-spec c-delete-overlay t) (def-edebug-spec c-delete-overlay t)

View file

@ -702,6 +702,36 @@ stuff. Used on level 1 and higher."
t) t)
(c-put-font-lock-face start (1+ start) 'font-lock-warning-face))))) (c-put-font-lock-face start (1+ start) 'font-lock-warning-face)))))
(defun c-font-lock-invalid-single-quotes (limit)
;; This function will be called from font-lock for a region bounded by POINT
;; and LIMIT, as though it were to identify a keyword for
;; font-lock-keyword-face. It always returns NIL to inhibit this and
;; prevent a repeat invocation. See elisp/lispref page "Search-based
;; Fontification".
;;
;; This function fontifies invalid single quotes with
;; `font-lock-warning-face'. These are the single quotes which
;; o - aren't inside a literal;
;; o - are marked with a syntax-table text property value '(1); and
;; o - are NOT marked with a non-null c-digit-separator property.
(let ((limits (c-literal-limits))
state beg end)
(if limits
(goto-char (cdr limits))) ; Even for being in a ' '
(while (< (point) limit)
(setq beg (point))
(setq state (parse-partial-sexp (point) limit nil nil nil 'syntax-table))
(setq end (point))
(goto-char beg)
(while (progn (skip-chars-forward "^'" end)
(< (point) end))
(if (and (equal (c-get-char-property (point) 'syntax-table) '(1))
(not (c-get-char-property (point) 'c-digit-separator)))
(c-put-font-lock-face (point) (1+ (point)) font-lock-warning-face))
(forward-char))
(parse-partial-sexp end limit nil nil state 'syntax-table)))
nil)
(c-lang-defconst c-basic-matchers-before (c-lang-defconst c-basic-matchers-before
"Font lock matchers for basic keywords, labels, references and various "Font lock matchers for basic keywords, labels, references and various
other easily recognizable things that should be fontified before generic other easily recognizable things that should be fontified before generic
@ -723,6 +753,9 @@ casts and declarations are fontified. Used on level 2 and higher."
(concat ".\\(" c-string-limit-regexp "\\)") (concat ".\\(" c-string-limit-regexp "\\)")
'((c-font-lock-invalid-string))) '((c-font-lock-invalid-string)))
;; Invalid single quotes.
c-font-lock-invalid-single-quotes
;; Fontify C++ raw strings. ;; Fontify C++ raw strings.
,@(when (c-major-mode-is 'c++-mode) ,@(when (c-major-mode-is 'c++-mode)
'(c-font-lock-raw-strings)) '(c-font-lock-raw-strings))

View file

@ -474,18 +474,19 @@ so that all identifiers are recognized as words.")
;; The value here may be a list of functions or a single function. ;; The value here may be a list of functions or a single function.
t nil t nil
c++ '(c-extend-region-for-CPP c++ '(c-extend-region-for-CPP
; c-before-after-change-extend-region-for-lambda-capture ; doesn't seem needed.
c-before-change-check-raw-strings c-before-change-check-raw-strings
c-before-change-check-<>-operators c-before-change-check-<>-operators
c-depropertize-CPP c-depropertize-CPP
c-before-after-change-digit-quote
c-invalidate-macro-cache c-invalidate-macro-cache
c-truncate-bs-cache) c-truncate-bs-cache
c-parse-quotes-before-change)
(c objc) '(c-extend-region-for-CPP (c objc) '(c-extend-region-for-CPP
c-depropertize-CPP c-depropertize-CPP
c-invalidate-macro-cache c-invalidate-macro-cache
c-truncate-bs-cache) c-truncate-bs-cache
;; java 'c-before-change-check-<>-operators c-parse-quotes-before-change)
java 'c-parse-quotes-before-change
;; 'c-before-change-check-<>-operators
awk 'c-awk-record-region-clear-NL) awk 'c-awk-record-region-clear-NL)
(c-lang-defvar c-get-state-before-change-functions (c-lang-defvar c-get-state-before-change-functions
(let ((fs (c-lang-const c-get-state-before-change-functions))) (let ((fs (c-lang-const c-get-state-before-change-functions)))
@ -515,18 +516,19 @@ parameters \(point-min) and \(point-max).")
t '(c-depropertize-new-text t '(c-depropertize-new-text
c-change-expand-fl-region) c-change-expand-fl-region)
(c objc) '(c-depropertize-new-text (c objc) '(c-depropertize-new-text
c-parse-quotes-after-change
c-extend-font-lock-region-for-macros c-extend-font-lock-region-for-macros
c-neutralize-syntax-in-and-mark-CPP c-neutralize-syntax-in-and-mark-CPP
c-change-expand-fl-region) c-change-expand-fl-region)
c++ '(c-depropertize-new-text c++ '(c-depropertize-new-text
c-parse-quotes-after-change
c-extend-font-lock-region-for-macros c-extend-font-lock-region-for-macros
; c-before-after-change-extend-region-for-lambda-capture ; doesn't seem needed.
c-before-after-change-digit-quote
c-after-change-re-mark-raw-strings c-after-change-re-mark-raw-strings
c-neutralize-syntax-in-and-mark-CPP c-neutralize-syntax-in-and-mark-CPP
c-restore-<>-properties c-restore-<>-properties
c-change-expand-fl-region) c-change-expand-fl-region)
java '(c-depropertize-new-text java '(c-depropertize-new-text
c-parse-quotes-after-change
c-restore-<>-properties c-restore-<>-properties
c-change-expand-fl-region) c-change-expand-fl-region)
awk '(c-depropertize-new-text awk '(c-depropertize-new-text
@ -609,6 +611,12 @@ EOL terminated statements."
(c c++ objc) t) (c c++ objc) t)
(c-lang-defvar c-has-bitfields (c-lang-const c-has-bitfields)) (c-lang-defvar c-has-bitfields (c-lang-const c-has-bitfields))
(c-lang-defconst c-has-quoted-numbers
"Whether the language has numbers quoted like 4'294'967'295."
t nil
c++ t)
(c-lang-defvar c-has-quoted-numbers (c-lang-const c-has-quoted-numbers))
(c-lang-defconst c-modified-constant (c-lang-defconst c-modified-constant
"Regexp that matches a “modified” constant literal such as \"L\\='a\\='\", "Regexp that matches a “modified” constant literal such as \"L\\='a\\='\",
a long character. In particular, this recognizes forms of constant a long character. In particular, this recognizes forms of constant

View file

@ -1083,101 +1083,215 @@ Note that the style variables are always made local to the buffer."
(forward-line)) ; no infinite loop with, e.g., "#//" (forward-line)) ; no infinite loop with, e.g., "#//"
))))) )))))
(defun c-before-after-change-digit-quote (beg end &optional old-len) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; This function either removes or applies the punctuation value ('(1)) of ;; Parsing of quotes.
;; the `syntax-table' text property on single quote marks which are ;;
;; separator characters in long integer literals, e.g. "4'294'967'295". It ;; Valid digit separators in numbers will get the syntax-table "punctuation"
;; applies to both decimal/octal and hex literals. (FIXME (2016-06-10): it ;; property, '(1), and also the text property `c-digit-separator' value t.
;; should also apply to binary literals.) ;;
;; Invalid other quotes (i.e. those not validly bounding a single character,
;; or escaped character) will get the syntax-table "punctuation" property,
;; '(1), too.
;;
;; Note that, for convenience, these properties are applied even inside
;; comments and strings.
(defconst c-maybe-quoted-number-head
(concat
"\\(0\\("
"\\([Xx]\\([0-9a-fA-F]\\('[0-9a-fA-F]\\|[0-9a-fA-F]\\)*'?\\)?\\)"
"\\|"
"\\([Bb]\\([01]\\('[01]\\|[01]\\)*'?\\)?\\)"
"\\|"
"\\('[0-7]\\|[0-7]\\)*'?"
"\\)"
"\\|"
"[1-9]\\('[0-9]\\|[0-9]\\)*'?"
"\\)")
"Regexp matching the head of a numeric literal, including with digit separators.")
(defun c-quoted-number-head-before-point ()
;; Return non-nil when the head of a possibly quoted number is found
;; immediately before point. The value returned in this case is the buffer
;; position of the start of the head.
(when c-has-quoted-numbers
(save-excursion
(let ((here (point))
)
(skip-chars-backward "0-9a-fA-F'")
(if (and (memq (char-before) '(?x ?X))
(eq (char-before (1- (point))) ?0))
(backward-char 2))
(while (and (search-forward-regexp c-maybe-quoted-number-head here t)
(< (match-end 0) here)))
(and (eq (match-end 0) here) (match-beginning 0))))))
(defconst c-maybe-quoted-number-tail
(concat
"\\("
"\\([xX']?[0-9a-fA-F]\\('[0-9a-fA-F]\\|[0-9a-fA-F]\\)*\\)"
"\\|"
"\\([bB']?[01]\\('[01]\\|[01]\\)*\\)"
"\\|"
"\\('?[0-9]\\('[0-9]\\|[0-9]\\)*\\)"
"\\)")
"Regexp matching the tail of a numeric literal, including with digit separators.
Note that this is a strict tail, so won't match, e.g. \"0x....\".")
(defun c-quoted-number-tail-after-point ()
;; Return non-nil when a proper tail of a possibly quoted number is found
;; immediately after point. The value returned in this case is the buffer
;; position of the end of the tail.
(when c-has-quoted-numbers
(and (looking-at c-maybe-quoted-number-tail)
(match-end 0))))
(defconst c-maybe-quoted-number
(concat
"\\(0\\("
"\\([Xx][0-9a-fA-F]\\('[0-9a-fA-F]\\|[0-9a-fA-F]\\)*\\)"
"\\|"
"\\([Bb][01]\\('[01]\\|[01]\\)*\\)"
"\\|"
"\\('[0-7]\\|[0-7]\\)*"
"\\)"
"\\|"
"[1-9]\\('[0-9]\\|[0-9]\\)*"
"\\)")
"Regexp matching a numeric literal, including with digit separators.")
(defun c-quoted-number-straddling-point ()
;; Return non-nil if a definitely quoted number starts before point and ends
;; after point. In this case the number is bounded by (match-beginning 0)
;; and (match-end 0).
(when c-has-quoted-numbers
(save-excursion
(let ((here (point))
(bound (progn (skip-chars-forward "0-9a-fA-F'") (point))))
(goto-char here)
(when (< (skip-chars-backward "0-9a-fA-F'") 0)
(if (and (memq (char-before) '(?x ?X))
(eq (char-before (1- (point))) ?0))
(backward-char 2))
(while (and (search-forward-regexp c-maybe-quoted-number bound t)
(<= (match-end 0) here)))
(and (< (match-beginning 0) here)
(> (match-end 0) here)
(save-match-data
(goto-char (match-beginning 0))
(save-excursion (search-forward "'" (match-end 0) t)))))))))
(defun c-parse-quotes-before-change (beg end)
;; This function analyzes 's near the region (c-new-BEG c-new-END), amending
;; those two variables as needed to include 's into that region when they
;; might be syntactically relevant to the change in progress.
;; ;;
;; In both uses of the function, the `syntax-table' properties are ;; Having amended that region, the function removes pertinent text
;; removed/applied only on quote marks which appear to be digit separators. ;; properties (syntax-table properties with value '(1) and c-digit-separator
;; props with value t) from 's in it. This operation is performed even
;; within strings and comments.
;; ;;
;; Point is undefined on both entry and exit to this function, and the ;; This function is called exclusively as a before-change function via the
;; return value has no significance. The function is called solely as a ;; variable `c-get-state-before-change-functions'.
;; before-change function (see `c-get-state-before-change-functions') and as (c-save-buffer-state (p-limit limits found)
;; an after change function (see `c-before-font-lock-functions', with the ;; Special consideraton for deleting \ from '\''.
;; parameters BEG, END, and (optionally) OLD-LEN being given the standard (if (and (> end beg)
;; values for before/after-change functions. (eq (char-before end) ?\\)
(c-save-buffer-state ((num-begin c-new-BEG) digit-re try-end) (<= c-new-END end))
(goto-char c-new-END) (setq c-new-END (min (1+ end) (point-max))))
(when (looking-at "\\(x\\)?[0-9a-fA-F']+")
(setq c-new-END (match-end 0))) ;; Do we have a ' (or something like ',',',',',') within range of
;; c-new-BEG?
(goto-char c-new-BEG) (goto-char c-new-BEG)
(when (looking-at "\\(x?\\)[0-9a-fA-F']") (setq p-limit (max (- (point) 2) (point-min)))
(if (re-search-backward "\\(0x\\)?[0-9a-fA-F]*\\=" nil t) (while (and (skip-chars-backward "^\\\\'" p-limit)
(setq c-new-BEG (point)))) (> (point) p-limit))
(when (eq (char-before) ?\\)
(setq p-limit (max (1- p-limit) (point-min))))
(backward-char)
(setq c-new-BEG (point)))
(beginning-of-line)
(while (and
(setq found (search-forward-regexp "\\('\\([^'\\]\\|\\\\.\\)\\)*'"
c-new-BEG 'limit))
(< (point) (1- c-new-BEG))))
(if found
(setq c-new-BEG
(if (and (eq (point) (1- c-new-BEG))
(eq (char-after) ?')) ; "''" before c-new-BEG.
(1- c-new-BEG)
(match-beginning 0))))
(while ;; Check for a number with quote separators straddling c-new-BEG
(re-search-forward "[0-9a-fA-F]'[0-9a-fA-F]" c-new-END t) (when c-has-quoted-numbers
(setq try-end (1- (point))) (goto-char c-new-BEG)
(re-search-backward "[^0-9a-fA-F']" num-begin t) (when ;; (c-quoted-number-straddling-point)
(setq digit-re (c-quoted-number-head-before-point)
(cond (setq c-new-BEG (match-beginning 0))))
((and (not (bobp)) (eq (char-before) ?0) (memq (char-after) '(?x ?X)))
"[0-9a-fA-F]") ;; Do we have a ' (or something like ',',',',...,',') within range of
((and (eq (char-after (1+ (point))) ?0) ;; c-new-END?
(memq (char-after (+ 2 (point))) '(?b ?B))) (goto-char c-new-END)
"[01]") (setq p-limit (min (+ (point) 2) (point-max)))
((memq (char-after (1+ (point))) '(?0 ?1 ?2 ?3 ?4 ?5 ?6 ?7 ?8 ?9)) (while (and (skip-chars-forward "^\\\\'" p-limit)
"[0-9]") (< (point) p-limit))
(t nil))) (when (eq (char-after) ?\\)
(when digit-re (setq p-limit (min (1+ p-limit) (point-max))))
(cond ((eq (char-after) ?x) (forward-char)) (forward-char)
((looking-at ".?0[Bb]") (goto-char (match-end 0))) (setq c-new-END (point)))
((looking-at digit-re)) (if (looking-at "[^']?\\('\\([^'\\]\\|\\\\.\\)\\)*'")
(t (forward-char))) (setq c-new-END (match-end 0)))
(when (not (c-in-literal))
(let ((num-end ; End of valid sequence of digits/quotes. ;; Check for a number with quote separators straddling c-new-END.
(save-excursion (when c-has-quoted-numbers
(re-search-forward (goto-char c-new-END)
(concat "\\=\\(" digit-re "+'\\)*" digit-re "+") nil t) (when ;; (c-quoted-number-straddling-point)
(point)))) (c-quoted-number-tail-after-point)
(setq try-end ; End of sequence of digits/quotes (setq c-new-END (match-end 0))))
;; Remove the '(1) syntax-table property from all "'"s within (c-new-BEG
;; c-new-END).
(c-clear-char-property-with-value-on-char
c-new-BEG c-new-END
'syntax-table '(1)
?')
;; Remove the c-digit-separator text property from the same "'"s.
(when c-has-quoted-numbers
(c-clear-char-property-with-value-on-char
c-new-BEG c-new-END
'c-digit-separator t
?'))))
(defun c-parse-quotes-after-change (beg end old-len)
;; This function applies syntax-table properties (value '(1)) and
;; c-digit-separator properties as needed to 's within the range (c-new-BEG
;; c-new-END). This operation is performed even within strings and
;; comments.
;;
;; This function is called exclusively as an after-change function via the
;; variable `c-before-font-lock-functions'.
(c-save-buffer-state (p-limit limits num-beg num-end clear-from-BEG-to)
;; Apply the needed syntax-table and c-digit-separator text properties to
;; quotes.
(goto-char c-new-BEG)
(while (and (< (point) c-new-END)
(search-forward "'" c-new-END 'limit))
(cond ((and (eq (char-before (1- (point))) ?\\)
;; Check we've got an odd number of \s, here.
(save-excursion (save-excursion
(re-search-forward (backward-char)
(concat "\\=\\(" digit-re "\\|'\\)+") nil t) (eq (logand (skip-chars-backward "\\\\") 1) 1)))) ; not a real '.
(point))) ((c-quoted-number-straddling-point)
(while (re-search-forward (setq num-beg (match-beginning 0)
(concat digit-re "\\('\\)" digit-re) num-end t) num-end (match-end 0))
(if old-len ; i.e. are we in an after-change function? (c-put-char-properties-on-char num-beg num-end
(c-put-char-property (match-beginning 1) 'syntax-table '(1)) 'syntax-table '(1) ?')
(c-clear-char-property (match-beginning 1) 'syntax-table)) (c-put-char-properties-on-char num-beg num-end
(backward-char))))) 'c-digit-separator t ?')
(goto-char try-end) (goto-char num-end))
(setq num-begin (point))))) ((looking-at "\\([^\\']\\|\\\\.\\)'") ; balanced quoted expression.
(goto-char (match-end 0)))
;; The following doesn't seem needed at the moment (2016-08-15). (t (c-put-char-property (1- (point)) 'syntax-table '(1)))))))
;; (defun c-before-after-change-extend-region-for-lambda-capture
;; (_beg _end &optional _old-len)
;; ;; In C++ Mode, extend the region (c-new-BEG c-new-END) to cover any lambda
;; ;; function capture lists we happen to be inside. This function is expected
;; ;; to be called both as a before-change and after change function.
;; ;;
;; ;; Note that these things _might_ be nested, with a capture list looking
;; ;; like:
;; ;;
;; ;; [ ...., &foo = [..](){...}(..), ... ]
;; ;;
;; ;; . What a wonderful language is C++. ;-)
;; (c-save-buffer-state (paren-state pos)
;; (goto-char c-new-BEG)
;; (setq paren-state (c-parse-state))
;; (while (setq pos (c-pull-open-brace paren-state))
;; (goto-char pos)
;; (when (c-looking-at-c++-lambda-capture-list)
;; (setq c-new-BEG (min c-new-BEG pos))
;; (if (c-go-list-forward)
;; (setq c-new-END (max c-new-END (point))))))
;; (goto-char c-new-END)
;; (setq paren-state (c-parse-state))
;; (while (setq pos (c-pull-open-brace paren-state))
;; (goto-char pos)
;; (when (c-looking-at-c++-lambda-capture-list)
;; (setq c-new-BEG (min c-new-BEG pos))
;; (if (c-go-list-forward)
;; (setq c-new-END (max c-new-END (point))))))))
(defun c-before-change (beg end) (defun c-before-change (beg end)
;; Function to be put on `before-change-functions'. Primarily, this calls ;; Function to be put on `before-change-functions'. Primarily, this calls