(TWO_MORE_BYTES): New macro.

(detect_coding_utf_16): Use TWO_MORE_BYTES instead of ONE_MORE_BYTE.
2025-12-26 07:11:34 -08:00 · 2009-01-14 12:19:44 +00:00 · 2009-01-14 12:19:44 +00:00 · f56a445091
commit f56a445091
parent 97d42150b4
2 changed files with 57 additions and 5 deletions
--- a/src/ChangeLog
+++ b/src/ChangeLog
@ -1,3 +1,9 @@
 2009-01-14  Kenichi Handa  <handa@m17n.org>
 	* coding.c (TWO_MORE_BYTES): New macro.
 	(detect_coding_utf_16): Use TWO_MORE_BYTES instead of
 	ONE_MORE_BYTE.
 2009-01-13  Chong Yidong  <cyd@stupidchicken.com>
 	* font.c (font_clear_prop): If clearing the family, clear the font
@ -90,7 +96,7 @@
 2009-01-07  Kenichi Handa  <handa@m17n.org>
 	* fileio.c (Finsert_file_contents): In the case of replace,
-	remeber the coding system used for decoding in
+	remember the coding system used for decoding in
 	coding_system (Bug#1039).
 	* coding.c (decode_coding_utf_8): Check byte_after_cr before
--- a/src/coding.c
+++ b/src/coding.c
@ -743,6 +743,47 @@ static struct coding_system coding_categories[coding_category_max];
    consumed_chars++;					\
  } while (0)
 /* Safely get two bytes from the source text pointed by SRC which ends
   at SRC_END, and set C1 and C2 to those bytes.  If there are not
   enough bytes in the source for C1, it jumps to `no_more_source'.
   If there are not enough bytes in the source for C2, set C2 to -1.
   If multibytep is nonzero and a multibyte character is found at SRC,
   set C1 and/or C2 to the negative value of the character code.  The
   caller should declare and set these variables appropriately in
   advance:
 	src, src_end, multibytep
   It is intended that this macro is used in detect_coding_utf_16.  */
 #define TWO_MORE_BYTES(c1, c2)			\
  do {						\
    if (src == src_end)				\
      goto no_more_source;			\
    c1 = *src++;				\
    if (multibytep && (c1 & 0x80))		\
      {						\
 	if ((c1 & 0xFE) == 0xC0)		\
 	  c1 = ((c1 & 1) << 6) | *src++;	\
 	else					\
 	  {					\
 	    c1 = c2 = -1;			\
 	    break;				\
 	  }					\
      }						\
    if (src == src_end)				\
      c2 = -1;					\
    else					\
      {						\
 	c2 = *src++;				\
 	if (multibytep && (c2 & 0x80))		\
 	  {					\
 	    if ((c2 & 0xFE) == 0xC0)		\
 	      c2 = ((c2 & 1) << 6) | *src++;	\
 	    else				\
 	      c2 = -1;				\
 	  }					\
      }						\
  } while (0)
 #define ONE_MORE_BYTE_NO_CHECK(c)			\
  do {							\
@ -1575,8 +1616,7 @@ detect_coding_utf_16 (coding, detect_info)
      return 0;
    }
-  ONE_MORE_BYTE (c1);
+  TWO_MORE_BYTES (c1, c2);
  ONE_MORE_BYTE (c2);
  if ((c1 == 0xFF) && (c2 == 0xFE))
    {
      detect_info->found |= (CATEGORY_MASK_UTF_16_LE
@ -1593,6 +1633,11 @@ detect_coding_utf_16 (coding, detect_info)
 				| CATEGORY_MASK_UTF_16_BE_NOSIG
 				| CATEGORY_MASK_UTF_16_LE_NOSIG);
    }
  else if (c1 < 0 || c2 < 0)
    {
      detect_info->rejected |= CATEGORY_MASK_UTF_16;
      return 0;
    }
  else
    {
      /* We check the dispersion of Eth and Oth bytes where E is even and
@ -1610,8 +1655,9 @@ detect_coding_utf_16 (coding, detect_info)
      while (1)
 	{
-	  ONE_MORE_BYTE (c1);
+	  TWO_MORE_BYTES (c1, c2);
-	  ONE_MORE_BYTE (c2);
+	  if (c1 < 0 || c2 < 0)
 	    break;
 	  if (! e[c1])
 	    {
 	      e[c1] = 1;