mirror of
git://git.sv.gnu.org/emacs.git
synced 2025-12-26 07:11:34 -08:00
(TWO_MORE_BYTES): New macro.
(detect_coding_utf_16): Use TWO_MORE_BYTES instead of ONE_MORE_BYTE.
This commit is contained in:
parent
97d42150b4
commit
f56a445091
2 changed files with 57 additions and 5 deletions
|
|
@ -1,3 +1,9 @@
|
||||||
|
2009-01-14 Kenichi Handa <handa@m17n.org>
|
||||||
|
|
||||||
|
* coding.c (TWO_MORE_BYTES): New macro.
|
||||||
|
(detect_coding_utf_16): Use TWO_MORE_BYTES instead of
|
||||||
|
ONE_MORE_BYTE.
|
||||||
|
|
||||||
2009-01-13 Chong Yidong <cyd@stupidchicken.com>
|
2009-01-13 Chong Yidong <cyd@stupidchicken.com>
|
||||||
|
|
||||||
* font.c (font_clear_prop): If clearing the family, clear the font
|
* font.c (font_clear_prop): If clearing the family, clear the font
|
||||||
|
|
@ -90,7 +96,7 @@
|
||||||
2009-01-07 Kenichi Handa <handa@m17n.org>
|
2009-01-07 Kenichi Handa <handa@m17n.org>
|
||||||
|
|
||||||
* fileio.c (Finsert_file_contents): In the case of replace,
|
* fileio.c (Finsert_file_contents): In the case of replace,
|
||||||
remeber the coding system used for decoding in
|
remember the coding system used for decoding in
|
||||||
coding_system (Bug#1039).
|
coding_system (Bug#1039).
|
||||||
|
|
||||||
* coding.c (decode_coding_utf_8): Check byte_after_cr before
|
* coding.c (decode_coding_utf_8): Check byte_after_cr before
|
||||||
|
|
|
||||||
54
src/coding.c
54
src/coding.c
|
|
@ -743,6 +743,47 @@ static struct coding_system coding_categories[coding_category_max];
|
||||||
consumed_chars++; \
|
consumed_chars++; \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
/* Safely get two bytes from the source text pointed by SRC which ends
|
||||||
|
at SRC_END, and set C1 and C2 to those bytes. If there are not
|
||||||
|
enough bytes in the source for C1, it jumps to `no_more_source'.
|
||||||
|
If there are not enough bytes in the source for C2, set C2 to -1.
|
||||||
|
If multibytep is nonzero and a multibyte character is found at SRC,
|
||||||
|
set C1 and/or C2 to the negative value of the character code. The
|
||||||
|
caller should declare and set these variables appropriately in
|
||||||
|
advance:
|
||||||
|
src, src_end, multibytep
|
||||||
|
It is intended that this macro is used in detect_coding_utf_16. */
|
||||||
|
|
||||||
|
#define TWO_MORE_BYTES(c1, c2) \
|
||||||
|
do { \
|
||||||
|
if (src == src_end) \
|
||||||
|
goto no_more_source; \
|
||||||
|
c1 = *src++; \
|
||||||
|
if (multibytep && (c1 & 0x80)) \
|
||||||
|
{ \
|
||||||
|
if ((c1 & 0xFE) == 0xC0) \
|
||||||
|
c1 = ((c1 & 1) << 6) | *src++; \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
c1 = c2 = -1; \
|
||||||
|
break; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
if (src == src_end) \
|
||||||
|
c2 = -1; \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
c2 = *src++; \
|
||||||
|
if (multibytep && (c2 & 0x80)) \
|
||||||
|
{ \
|
||||||
|
if ((c2 & 0xFE) == 0xC0) \
|
||||||
|
c2 = ((c2 & 1) << 6) | *src++; \
|
||||||
|
else \
|
||||||
|
c2 = -1; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
|
||||||
#define ONE_MORE_BYTE_NO_CHECK(c) \
|
#define ONE_MORE_BYTE_NO_CHECK(c) \
|
||||||
do { \
|
do { \
|
||||||
|
|
@ -1575,8 +1616,7 @@ detect_coding_utf_16 (coding, detect_info)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
ONE_MORE_BYTE (c1);
|
TWO_MORE_BYTES (c1, c2);
|
||||||
ONE_MORE_BYTE (c2);
|
|
||||||
if ((c1 == 0xFF) && (c2 == 0xFE))
|
if ((c1 == 0xFF) && (c2 == 0xFE))
|
||||||
{
|
{
|
||||||
detect_info->found |= (CATEGORY_MASK_UTF_16_LE
|
detect_info->found |= (CATEGORY_MASK_UTF_16_LE
|
||||||
|
|
@ -1593,6 +1633,11 @@ detect_coding_utf_16 (coding, detect_info)
|
||||||
| CATEGORY_MASK_UTF_16_BE_NOSIG
|
| CATEGORY_MASK_UTF_16_BE_NOSIG
|
||||||
| CATEGORY_MASK_UTF_16_LE_NOSIG);
|
| CATEGORY_MASK_UTF_16_LE_NOSIG);
|
||||||
}
|
}
|
||||||
|
else if (c1 < 0 || c2 < 0)
|
||||||
|
{
|
||||||
|
detect_info->rejected |= CATEGORY_MASK_UTF_16;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/* We check the dispersion of Eth and Oth bytes where E is even and
|
/* We check the dispersion of Eth and Oth bytes where E is even and
|
||||||
|
|
@ -1610,8 +1655,9 @@ detect_coding_utf_16 (coding, detect_info)
|
||||||
|
|
||||||
while (1)
|
while (1)
|
||||||
{
|
{
|
||||||
ONE_MORE_BYTE (c1);
|
TWO_MORE_BYTES (c1, c2);
|
||||||
ONE_MORE_BYTE (c2);
|
if (c1 < 0 || c2 < 0)
|
||||||
|
break;
|
||||||
if (! e[c1])
|
if (! e[c1])
|
||||||
{
|
{
|
||||||
e[c1] = 1;
|
e[c1] = 1;
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue