mirror of
git://git.sv.gnu.org/emacs.git
synced 2026-01-07 12:20:39 -08:00
Fix ASCII-only conversion logic (bug#40407)
To sidestep conversion altogether when EOL conversion applies, we must either be encoding a string without NL, or decoding without CR. * src/coding.c (string_ascii_p): Revert to a pure predicate. (code_convert_string): Fix logic. Don't use uninitialised ascii_p (removed). Use memchr to detect CR or LF in string when needed. * test/src/coding-tests.el (coding-nocopy-ascii): Update tests to include encodings with explicit EOL conversions.
This commit is contained in:
parent
95dd8de1df
commit
d3e2c88041
2 changed files with 38 additions and 41 deletions
46
src/coding.c
46
src/coding.c
|
|
@ -9474,22 +9474,15 @@ not fully specified.) */)
|
|||
return code_convert_region (start, end, coding_system, destination, 1, 0);
|
||||
}
|
||||
|
||||
/* Non-zero if STR contains only characters in the 0..127 range.
|
||||
Positive if STR includes characters that don't need EOL conversion
|
||||
on decoding, negative otherwise. */
|
||||
static int
|
||||
string_ascii_p (Lisp_Object str)
|
||||
/* Whether STRING only contains chars in the 0..127 range. */
|
||||
static bool
|
||||
string_ascii_p (Lisp_Object string)
|
||||
{
|
||||
ptrdiff_t nbytes = SBYTES (str);
|
||||
bool CR_Seen = false;
|
||||
ptrdiff_t nbytes = SBYTES (string);
|
||||
for (ptrdiff_t i = 0; i < nbytes; i++)
|
||||
{
|
||||
if (SREF (str, i) > 127)
|
||||
return 0;
|
||||
if (SREF (str, i) == '\r')
|
||||
CR_Seen = true;
|
||||
}
|
||||
return CR_Seen ? -1 : 1;
|
||||
if (SREF (string, i) > 127)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
Lisp_Object
|
||||
|
|
@ -9526,24 +9519,19 @@ code_convert_string (Lisp_Object string, Lisp_Object coding_system,
|
|||
if (EQ (dst_object, Qt))
|
||||
{
|
||||
/* Fast path for ASCII-only input and an ASCII-compatible coding:
|
||||
act as identity if no EOL conversion is neede. */
|
||||
int ascii_p;
|
||||
act as identity if no EOL conversion is needed. */
|
||||
Lisp_Object attrs = CODING_ID_ATTRS (coding.id);
|
||||
if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs))
|
||||
&& (STRING_MULTIBYTE (string)
|
||||
? (chars == bytes) : ((ascii_p = string_ascii_p (string)) != 0)))
|
||||
{
|
||||
if (ascii_p > 0
|
||||
|| (ascii_p < 0
|
||||
&& (EQ (CODING_ID_EOL_TYPE (coding.id), Qunix)
|
||||
|| inhibit_eol_conversion)))
|
||||
return (nocopy
|
||||
? string
|
||||
: (encodep
|
||||
? make_unibyte_string (SSDATA (string), bytes)
|
||||
: make_multibyte_string (SSDATA (string),
|
||||
bytes, bytes)));
|
||||
}
|
||||
? (chars == bytes) : string_ascii_p (string))
|
||||
&& (EQ (CODING_ID_EOL_TYPE (coding.id), Qunix)
|
||||
|| inhibit_eol_conversion
|
||||
|| ! memchr (SDATA (string), encodep ? '\n' : '\r', bytes)))
|
||||
return (nocopy
|
||||
? string
|
||||
: (encodep
|
||||
? make_unibyte_string (SSDATA (string), bytes)
|
||||
: make_multibyte_string (SSDATA (string), bytes, bytes)));
|
||||
}
|
||||
else if (BUFFERP (dst_object))
|
||||
{
|
||||
|
|
|
|||
|
|
@ -388,29 +388,38 @@
|
|||
(let* ((uni (apply #'string (number-sequence 0 127)))
|
||||
(multi (string-to-multibyte uni)))
|
||||
(dolist (s (list uni multi))
|
||||
;; Encodings without EOL conversion.
|
||||
(dolist (coding '(us-ascii-unix iso-latin-1-unix utf-8-unix))
|
||||
(should-not (eq (decode-coding-string s coding nil) s))
|
||||
(should-not (eq (encode-coding-string s coding nil) s))
|
||||
(should (eq (decode-coding-string s coding t) s))
|
||||
(should (eq (encode-coding-string s coding t) s)))))
|
||||
(let* ((uni (apply #'string (number-sequence 15 127)))
|
||||
(should (eq (encode-coding-string s coding t) s)))
|
||||
|
||||
;; With EOL conversion inhibited.
|
||||
(let ((inhibit-eol-conversion t))
|
||||
(dolist (coding '(us-ascii iso-latin-1 utf-8))
|
||||
(should-not (eq (decode-coding-string s coding nil) s))
|
||||
(should-not (eq (encode-coding-string s coding nil) s))
|
||||
(should (eq (decode-coding-string s coding t) s))
|
||||
(should (eq (encode-coding-string s coding t) s))))))
|
||||
|
||||
;; Check identity decoding with EOL conversion for ASCII except CR.
|
||||
(let* ((uni (apply #'string (delq ?\r (number-sequence 0 127))))
|
||||
(multi (string-to-multibyte uni)))
|
||||
(dolist (s (list uni multi))
|
||||
(dolist (coding '(us-ascii iso-latin-1 utf-8))
|
||||
(dolist (coding '(us-ascii-dos iso-latin-1-dos utf-8-dos mac-roman-mac))
|
||||
(should-not (eq (decode-coding-string s coding nil) s))
|
||||
(should-not (eq (encode-coding-string s coding nil) s))
|
||||
(should (eq (decode-coding-string s coding t) s))
|
||||
(should (eq (encode-coding-string s coding t) s)))))
|
||||
(let* ((uni (apply #'string (number-sequence 0 127)))
|
||||
(multi (string-to-multibyte uni))
|
||||
(inhibit-eol-conversion t))
|
||||
(should (eq (decode-coding-string s coding t) s)))))
|
||||
|
||||
;; Check identity encoding with EOL conversion for ASCII except LF.
|
||||
(let* ((uni (apply #'string (delq ?\n (number-sequence 0 127))))
|
||||
(multi (string-to-multibyte uni)))
|
||||
(dolist (s (list uni multi))
|
||||
(dolist (coding '(us-ascii iso-latin-1 utf-8))
|
||||
(should-not (eq (decode-coding-string s coding nil) s))
|
||||
(dolist (coding '(us-ascii-dos iso-latin-1-dos utf-8-dos mac-roman-mac))
|
||||
(should-not (eq (encode-coding-string s coding nil) s))
|
||||
(should (eq (decode-coding-string s coding t) s))
|
||||
(should (eq (encode-coding-string s coding t) s))))))
|
||||
|
||||
|
||||
(ert-deftest coding-check-coding-systems-region ()
|
||||
(should (equal (check-coding-systems-region "aå" nil '(utf-8))
|
||||
nil))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue