mirror of
git://git.sv.gnu.org/emacs.git
synced 2026-01-23 21:13:52 -08:00
(ctext-no-compositions, compound-text-with-extensions): New coding systems.
(x-ctext-with-extensions, ctext-with-extensions): Aliases for compound-text-with-extensions. (non-standard-icccm-encodings-alist, non-standard-designations-alist): New variables. (ctext-post-read-conversion, ctext-pre-write-conversion): New functions.
This commit is contained in:
parent
102ddfc133
commit
ec2b93f7b5
1 changed files with 182 additions and 0 deletions
|
|
@ -405,6 +405,188 @@ is treated as a character."
|
|||
(define-coding-system-alias 'x-ctext 'compound-text)
|
||||
(define-coding-system-alias 'ctext 'compound-text)
|
||||
|
||||
;; Same as compound-text, but doesn't produce composition escape
|
||||
;; sequences. Used in post-read and pre-write conversions of
|
||||
;; ctext-with-extensions, below.
|
||||
(make-coding-system
|
||||
'ctext-no-compositions 2 ?x
|
||||
"Compound text based generic encoding for decoding unknown messages.
|
||||
|
||||
Like `compound-text', but does not produce escape sequences for compositions."
|
||||
'((ascii t) (latin-iso8859-1 katakana-jisx0201 t) t t
|
||||
nil ascii-eol ascii-cntl nil locking-shift single-shift nil nil nil
|
||||
init-bol nil nil)
|
||||
'((safe-charsets . t)
|
||||
(mime-charset . x-ctext)))
|
||||
|
||||
(defvar non-standard-icccm-encodings-alist
|
||||
'(("ISO8859-15" . latin-iso8859-15)
|
||||
("ISO8859-14" . latin-iso8859-14)
|
||||
("KOI8-R" . koi8-r)
|
||||
("BIG5-0" . big5))
|
||||
"Alist of font charset names defined by XLFD, and the corresponding Emacs
|
||||
charsets or coding systems.")
|
||||
|
||||
;; Functions to support "Non-Standard Character Set Encodings" defined
|
||||
;; by the ICCCM spec. We support that by converting the leading
|
||||
;; sequence of the ``extended segment'' to the corresponding ISO-2022
|
||||
;; sequences (if the leading sequence names an Emacs charset), or decode
|
||||
;; the segment (if it names a coding system). Encoding does the reverse.
|
||||
(defun ctext-post-read-conversion (len)
|
||||
"Decode LEN characters encoded as Compound Text with Extended Segments."
|
||||
(buffer-disable-undo) ; minimize consing due to insertions and deletions
|
||||
(narrow-to-region (point) (+ (point) len))
|
||||
(save-match-data
|
||||
(let ((pt (point-marker))
|
||||
(oldpt (point-marker))
|
||||
(newpt (make-marker))
|
||||
(modified-p (buffer-modified-p))
|
||||
(case-fold-search nil)
|
||||
last-coding-system-used
|
||||
encoding textlen chset)
|
||||
(while (re-search-forward
|
||||
"\\(\e\\)%/[0-4]\\([\200-\377][\200-\377]\\)\\([^\002]+\\)\002"
|
||||
nil 'move)
|
||||
(set-marker newpt (point))
|
||||
(set-marker pt (match-beginning 0))
|
||||
(setq encoding (match-string 3))
|
||||
(setq textlen (- (+ (* (- (aref (match-string 2) 0) 128) 128)
|
||||
(- (aref (match-string 2) 1) 128))
|
||||
(1+ (length encoding))))
|
||||
(setq
|
||||
chset (cdr (assoc-ignore-case encoding
|
||||
non-standard-icccm-encodings-alist)))
|
||||
(cond ((null chset)
|
||||
;; This charset is not supported--leave this extended
|
||||
;; segment unaltered and skip over it.
|
||||
(goto-char (+ (point) textlen)))
|
||||
((charsetp chset)
|
||||
;; If it's a charset, replace the leading escape sequence
|
||||
;; with a standard ISO-2022 sequence. We will decode all
|
||||
;; such segments later, in one go, when we exit the loop
|
||||
;; or find an extended segment that names a coding
|
||||
;; system, not a charset.
|
||||
(replace-match
|
||||
(concat "\\1"
|
||||
(if (= 0 (charset-iso-graphic-plane chset))
|
||||
;; GL charsets
|
||||
(if (= 1 (charset-dimension chset)) "(" "$(")
|
||||
;; GR charsets
|
||||
(if (= 96 (charset-chars chset))
|
||||
"-"
|
||||
(if (= 1 (charset-dimension chset)) ")" "$)")))
|
||||
(string (charset-iso-final-char chset)))
|
||||
t)
|
||||
(goto-char (+ (point) textlen)))
|
||||
((coding-system-p chset)
|
||||
;; If it's a coding system, we need to decode the segment
|
||||
;; right away. But first, decode what we've skipped
|
||||
;; across until now.
|
||||
(when (> pt oldpt)
|
||||
(decode-coding-region oldpt pt 'ctext-no-compositions))
|
||||
(delete-region pt newpt)
|
||||
(set-marker newpt (+ newpt textlen))
|
||||
(decode-coding-region pt newpt chset)
|
||||
(goto-char newpt)
|
||||
(set-marker oldpt newpt))))
|
||||
;; Decode what's left.
|
||||
(when (> (point) oldpt)
|
||||
(decode-coding-region oldpt (point) 'ctext-no-compositions))
|
||||
;; This buffer started as unibyte, because the string we get from
|
||||
;; the X selection is a unibyte string. We must now make it
|
||||
;; multibyte, so that the decoded text is inserted as multibyte
|
||||
;; into its buffer.
|
||||
(set-buffer-multibyte t)
|
||||
(set-buffer-modified-p modified-p)
|
||||
(- (point-max) (point-min)))))
|
||||
|
||||
(defvar non-standard-designations-alist
|
||||
'(("$(0" . (big5 "big5-0" 2))
|
||||
("$(1" . (big5 "big5-0" 2))
|
||||
("-V" . (t "iso8859-10" 1))
|
||||
("-Y" . (t "iso8859-13" 1))
|
||||
("-_" . (t "iso8859-14" 1))
|
||||
("-b" . (t "iso8859-15" 1))
|
||||
("-f" . (t "iso8859-16" 1)))
|
||||
"Alist of ctext control sequences that introduce character sets which
|
||||
are not in the list of approved ICCCM encodings, and the corresponding
|
||||
coding system, identifier string, and number of octets per encoded
|
||||
character.
|
||||
|
||||
Each element has the form (CTLSEQ . (ENCODING CHARSET NOCTETS)). CTLSEQ
|
||||
is the control sequence (sans the leading ESC) that introduces the character
|
||||
set in the text encoded by compound-text. ENCODING is a coding system
|
||||
symbol; if it is t, it means that the ctext coding system already encodes
|
||||
the text correctly, and only the leading control sequence needs to be altered.
|
||||
If ENCODING is a coding system, we need to re-encode the text with that
|
||||
coding system. CHARSET is the ICCCM name of the charset we need to put into
|
||||
the leading control sequence. NOCTETS is the number of octets (bytes) that
|
||||
encode each character in this charset. NOCTETS can be 0 (meaning the number
|
||||
of octets per character is variable), 1, 2, 3, or 4.")
|
||||
|
||||
(defun ctext-pre-write-conversion (from to)
|
||||
"Encode characters between FROM and TO as Compound Text w/Extended Segments."
|
||||
(buffer-disable-undo) ; minimize consing due to insertions and deletions
|
||||
(narrow-to-region from to)
|
||||
(encode-coding-region from to 'ctext-no-compositions)
|
||||
;; Replace ISO-2022 charset designations with extended segments, for
|
||||
;; those charsets that are not part of the official X registry.
|
||||
(save-match-data
|
||||
(goto-char (point-min))
|
||||
(let ((newpt (make-marker))
|
||||
(case-fold-search nil)
|
||||
pt desig encode-info encoding chset noctets textlen)
|
||||
(set-buffer-multibyte nil)
|
||||
(while (re-search-forward "\e\\(\$([01]\\|-[VY_bf]\\)" nil 'move)
|
||||
(setq desig (match-string 1)
|
||||
pt (point-marker)
|
||||
encode-info (cdr (assoc desig non-standard-designations-alist))
|
||||
encoding (car encode-info)
|
||||
chset (cadr encode-info)
|
||||
noctets (car (cddr encode-info)))
|
||||
(skip-chars-forward "^\e")
|
||||
(set-marker newpt (point))
|
||||
(cond
|
||||
((eq encoding t) ; only the leading sequence needs to be changed
|
||||
(setq textlen (+ (- newpt pt) (length chset) 1))
|
||||
(replace-match (format "\e%%/%d%c%c%s"
|
||||
noctets
|
||||
(+ (/ textlen 128) 128)
|
||||
(+ (% textlen 128) 128)
|
||||
chset)
|
||||
t t))
|
||||
((coding-system-p encoding) ; need to recode the entire segment...
|
||||
(set-marker pt (match-beginning 0))
|
||||
(decode-coding-region pt newpt 'ctext-no-compositions)
|
||||
(set-buffer-multibyte t)
|
||||
(encode-coding-region pt newpt encoding)
|
||||
(set-buffer-multibyte nil)
|
||||
(setq textlen (+ (- newpt pt) (length chset) 1))
|
||||
(goto-char pt)
|
||||
(insert (format "\e%%/%d%c%c%s"
|
||||
noctets
|
||||
(+ (/ textlen 128) 128)
|
||||
(+ (% textlen 128) 128)
|
||||
chset))))
|
||||
(goto-char newpt))))
|
||||
(set-buffer-multibyte t)
|
||||
nil)
|
||||
|
||||
(make-coding-system
|
||||
'compound-text-with-extensions 5 ?x
|
||||
"Compound text encoding with ICCCM Extended Segment extensions.
|
||||
|
||||
This coding system should be used only for X selections. It is inappropriate
|
||||
for decoding and encoding files, process I/O, etc."
|
||||
nil
|
||||
'((post-read-conversion . ctext-post-read-conversion)
|
||||
(pre-write-conversion . ctext-pre-write-conversion)))
|
||||
|
||||
(define-coding-system-alias
|
||||
'x-ctext-with-extensions 'compound-text-with-extensions)
|
||||
(define-coding-system-alias
|
||||
'ctext-with-extensions 'compound-text-with-extensions)
|
||||
|
||||
(make-coding-system
|
||||
'iso-safe 2 ?-
|
||||
"Convert all characters but ASCII to `?'."
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue