mirror of
git://git.sv.gnu.org/emacs.git
synced 2026-01-30 12:21:25 -08:00
Fix yank-media utf-16 detection
* lisp/yank-media.el (yank-media--utf-16-p): Factor out into its own function for easier testing and fix the code.
This commit is contained in:
parent
89d419255b
commit
224aeb2877
2 changed files with 58 additions and 27 deletions
|
|
@ -155,33 +155,7 @@ non-supported selection data types."
|
|||
(format "%s" data))
|
||||
((string-match-p "\\`text/" (symbol-name data-type))
|
||||
;; We may have utf-16, which Emacs won't detect automatically.
|
||||
(let ((coding-system
|
||||
(and (zerop (mod (length data) 2))
|
||||
(let ((stats (vector 0 0)))
|
||||
(dotimes (i (length data))
|
||||
(when (zerop (elt data i))
|
||||
(setf (aref stats (mod i 2))
|
||||
(1+ (aref stats (mod i 2))))))
|
||||
;; We have some nuls...
|
||||
(and (not (and (zerop (elt stats 0))
|
||||
(zerop (elt stats 1))))
|
||||
;; If we have more than 90% every-other nul, then it's
|
||||
;; pretty likely to be utf-16.
|
||||
(cond
|
||||
((> (if (zerop (elt stats 1))
|
||||
1
|
||||
(/ (float (elt stats 0))
|
||||
(float (elt stats 1))))
|
||||
0.9)
|
||||
;; Big endian.
|
||||
'utf-16-be)
|
||||
((> (if (zerop (elt stats 0))
|
||||
1
|
||||
(/ (float (elt stats 1))
|
||||
(float (elt stats 0))))
|
||||
0.9)
|
||||
;; Little endian.
|
||||
'utf-16-le)))))))
|
||||
(let ((coding-system (yank-media--utf-16-p data)))
|
||||
(if coding-system
|
||||
(decode-coding-string data coding-system)
|
||||
;; Some programs add a nul character at the end of text/*
|
||||
|
|
@ -192,6 +166,25 @@ non-supported selection data types."
|
|||
(t
|
||||
data)))
|
||||
|
||||
(defun yank-media--utf-16-p (data)
|
||||
(and (zerop (mod (length data) 2))
|
||||
(let ((stats (vector 0 0)))
|
||||
(dotimes (i (length data))
|
||||
(when (zerop (elt data i))
|
||||
(setf (aref stats (mod i 2))
|
||||
(1+ (aref stats (mod i 2))))))
|
||||
;; If we have more than 90% every-other nul, then it's
|
||||
;; pretty likely to be utf-16.
|
||||
(cond
|
||||
((> (/ (float (elt stats 0)) (/ (length data) 2))
|
||||
0.9)
|
||||
;; Big endian.
|
||||
'utf-16-be)
|
||||
((> (/ (float (elt stats 1)) (/ (length data) 2))
|
||||
0.9)
|
||||
;; Little endian.
|
||||
'utf-16-le)))))
|
||||
|
||||
(provide 'yank-media)
|
||||
|
||||
;;; yank-media.el ends here
|
||||
|
|
|
|||
38
test/lisp/yank-media-tests.el
Normal file
38
test/lisp/yank-media-tests.el
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
;;; yank-media-tests.el --- Tests for yank-media.el -*- lexical-binding: t; -*-
|
||||
|
||||
;; Copyright (C) 2022 Free Software Foundation, Inc.
|
||||
|
||||
;; This file is part of GNU Emacs.
|
||||
|
||||
;; GNU Emacs is free software: you can redistribute it and/or modify
|
||||
;; it under the terms of the GNU General Public License as published by
|
||||
;; the Free Software Foundation, either version 3 of the License, or
|
||||
;; (at your option) any later version.
|
||||
|
||||
;; GNU Emacs is distributed in the hope that it will be useful,
|
||||
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
;; GNU General Public License for more details.
|
||||
|
||||
;; You should have received a copy of the GNU General Public License
|
||||
;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
;;; Commentary:
|
||||
|
||||
;;
|
||||
|
||||
;;; Code:
|
||||
|
||||
(require 'yank-media)
|
||||
(require 'ert)
|
||||
(require 'ert-x)
|
||||
|
||||
(ert-deftest test-utf-16 ()
|
||||
(should-not (yank-media--utf-16-p "f"))
|
||||
(should-not (yank-media--utf-16-p "fo"))
|
||||
(should-not (yank-media--utf-16-p "\000ofo"))
|
||||
(should (eq (yank-media--utf-16-p "\000o\000o") 'utf-16-be))
|
||||
(should (eq (yank-media--utf-16-p "o\000o\000") 'utf-16-le))
|
||||
(should-not (yank-media--utf-16-p "o\000\000o")))
|
||||
|
||||
;;; yank-media-tests.el ends here
|
||||
Loading…
Add table
Add a link
Reference in a new issue