(CHAR_STRING_ADVANCE_NO_UNIFY)

(STRING_CHAR_ADVANCE_NO_UNIFY): New macros. (coding_alloc_by_making_gap): Fix the way to preserve data in the gap. (alloc_destination): Fix the 2nd arg to coding_alloc_by_making_gap. (encode_coding_utf_8): Use CHAR_STRING_ADVANCE_NO_UNIFY instead of CHAR_STRING_ADVANCE. (produce_chars): Fix for the case that the source and the destination are the same buffer. Use CHAR_STRING_ADVANCE_NO_UNIFY instead of CHAR_STRING_ADVANCE. (consume_chars): Use STRING_CHAR_ADVANCE_NO_UNIFY instead of STRING_CHAR_ADVANCE.
2025-12-19 20:30:29 -08:00 · 2008-03-16 01:24:55 +00:00 · 2008-03-16 01:24:55 +00:00 · db274c7a06
commit db274c7a06
parent b2fba0132f
1 changed files with 105 additions and 20 deletions
--- a/src/coding.c
+++ b/src/coding.c
@ -972,6 +972,66 @@ record_conversion_result (struct coding_system *coding,
  } while (0)


+/* Store multibyte form of the character C in P, and advance P to the
+   end of the multibyte form.  This is like CHAR_STRING_ADVANCE but it
+   never calls MAYBE_UNIFY_CHAR.  */
+
+#define CHAR_STRING_ADVANCE_NO_UNIFY(c, p)	\
+  do {						\
+    if ((c) <= MAX_1_BYTE_CHAR)			\
+      *(p)++ = (c);				\
+    else if ((c) <= MAX_2_BYTE_CHAR)		\
+      *(p)++ = (0xC0 | ((c) >> 6)),		\
+	*(p)++ = (0x80 | ((c) & 0x3F));		\
+    else if ((c) <= MAX_3_BYTE_CHAR)		\
+      *(p)++ = (0xE0 | ((c) >> 12)),		\
+	*(p)++ = (0x80 | (((c) >> 6) & 0x3F)),	\
+	*(p)++ = (0x80 | ((c) & 0x3F));		\
+    else if ((c) <= MAX_4_BYTE_CHAR)		\
+      *(p)++ = (0xF0 | (c >> 18)),		\
+	*(p)++ = (0x80 | ((c >> 12) & 0x3F)),	\
+	*(p)++ = (0x80 | ((c >> 6) & 0x3F)),	\
+	*(p)++ = (0x80 | (c & 0x3F));		\
+    else if ((c) <= MAX_5_BYTE_CHAR)		\
+      *(p)++ = 0xF8,				\
+	*(p)++ = (0x80 | ((c >> 18) & 0x0F)),	\
+	*(p)++ = (0x80 | ((c >> 12) & 0x3F)),	\
+	*(p)++ = (0x80 | ((c >> 6) & 0x3F)),	\
+	*(p)++ = (0x80 | (c & 0x3F));		\
+    else					\
+      (p) += BYTE8_STRING ((c) - 0x3FFF80, p);	\
+  } while (0)
+
+
+/* Return the character code of character whose multibyte form is at
+   P, and advance P to the end of the multibyte form.  This is like
+   STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR.  */
+
+#define STRING_CHAR_ADVANCE_NO_UNIFY(p)				\
+  (!((p)[0] & 0x80)						\
+   ? *(p)++							\
+   : ! ((p)[0] & 0x20)						\
+   ? ((p) += 2,							\
+      ((((p)[-2] & 0x1F) << 6)					\
+       | ((p)[-1] & 0x3F)					\
+       | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0)))	\
+   : ! ((p)[0] & 0x10)						\
+   ? ((p) += 3,							\
+      ((((p)[-3] & 0x0F) << 12)					\
+       | (((p)[-2] & 0x3F) << 6)				\
+       | ((p)[-1] & 0x3F)))					\
+   : ! ((p)[0] & 0x08)						\
+   ? ((p) += 4,							\
+      ((((p)[-4] & 0xF) << 18)					\
+       | (((p)[-3] & 0x3F) << 12)				\
+       | (((p)[-2] & 0x3F) << 6)				\
+       | ((p)[-1] & 0x3F)))					\
+   : ((p) += 5,							\
+      ((((p)[-4] & 0x3F) << 18)					\
+       | (((p)[-3] & 0x3F) << 12)				\
+       | (((p)[-2] & 0x3F) << 6)				\
+       | ((p)[-1] & 0x3F))))
+

 static void
 coding_set_source (coding)
@ -1037,20 +1097,23 @@ coding_alloc_by_realloc (coding, bytes)
 }

 static void
-coding_alloc_by_making_gap (coding, offset, bytes)
+coding_alloc_by_making_gap (coding, gap_head_used, bytes)
     struct coding_system *coding;
-     EMACS_INT offset, bytes;
+     EMACS_INT gap_head_used, bytes;
 {
-  if (BUFFERP (coding->dst_object)
-      && EQ (coding->src_object, coding->dst_object))
+  if (EQ (coding->src_object, coding->dst_object))
    {
-      EMACS_INT add = offset + (coding->src_bytes - coding->consumed);
+      /* The gap may contain the produced data at the head and not-yet
+	 consumed data at the tail.  To preserve those data, we at
+	 first make the gap size to zero, then increase the gap
+	 size.  */
+      EMACS_INT add = GAP_SIZE;

-      GPT += offset, GPT_BYTE += offset;
-      GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
+      GPT += gap_head_used, GPT_BYTE += gap_head_used;
+      GAP_SIZE = 0; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
      make_gap (bytes);
      GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
-      GPT -= offset, GPT_BYTE -= offset;
+      GPT -= gap_head_used, GPT_BYTE -= gap_head_used;
    }
  else
    {
@ -1073,7 +1136,11 @@ alloc_destination (coding, nbytes, dst)
  EMACS_INT offset = dst - coding->destination;

  if (BUFFERP (coding->dst_object))
-    coding_alloc_by_making_gap (coding, offset, nbytes);
+    {
+      struct buffer *buf = XBUFFER (coding->dst_object);
+
+      coding_alloc_by_making_gap (coding, dst - BUF_GPT_ADDR (buf), nbytes);
+    }
  else
    coding_alloc_by_realloc (coding, nbytes);
  record_conversion_result (coding, CODING_RESULT_SUCCESS);
@ -1365,7 +1432,7 @@ encode_coding_utf_8 (coding)
 	    }
 	  else
 	    {
-	      CHAR_STRING_ADVANCE (c, pend);
+	      CHAR_STRING_ADVANCE_NO_UNIFY (c, pend);
 	      for (p = str; p < pend; p++)
 		EMIT_ONE_BYTE (*p);
 	    }
@ -1382,7 +1449,7 @@ encode_coding_utf_8 (coding)
 	  if (CHAR_BYTE8_P (c))
 	    *dst++ = CHAR_TO_BYTE8 (c);
 	  else
-	    dst += CHAR_STRING (c, dst);
+	    CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
 	  produced_chars++;
 	}
    }
@ -5971,9 +6038,11 @@ produce_chars (coding, translation_table, last_block)
      int *buf = coding->charbuf;
      int *buf_end = buf + coding->charbuf_used;

-      if (BUFFERP (coding->src_object)
-	  && EQ (coding->src_object, coding->dst_object))
-	dst_end = ((unsigned char *) coding->source) + coding->consumed;
+      if (EQ (coding->src_object, coding->dst_object))
+	{
+	  coding_set_source (coding);
+	  dst_end = ((unsigned char *) coding->source) + coding->consumed;
+	}

      while (buf < buf_end)
 	{
@ -6000,7 +6069,13 @@ produce_chars (coding, translation_table, last_block)
 					   buf_end - buf
 					   + MAX_MULTIBYTE_LENGTH * to_nchars,
 					   dst);
-		  dst_end = coding->destination + coding->dst_bytes;
+		  if (EQ (coding->src_object, coding->dst_object))
+		    {
+		      coding_set_source (coding);
+		      dst_end = ((unsigned char *) coding->source) + coding->consumed;
+		    }
+		  else
+		    dst_end = coding->destination + coding->dst_bytes;
 		}

 	      for (i = 0; i < to_nchars; i++)
@ -6009,7 +6084,7 @@ produce_chars (coding, translation_table, last_block)
 		    c = XINT (AREF (trans, i));
 		  if (coding->dst_multibyte
 		      || ! CHAR_BYTE8_P (c))
-		    CHAR_STRING_ADVANCE (c, dst);
+		    CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
 		  else
 		    *dst++ = CHAR_TO_BYTE8 (c);
 		}
@ -6030,6 +6105,8 @@ produce_chars (coding, translation_table, last_block)
      const unsigned char *src = coding->source;
      const unsigned char *src_end = src + coding->consumed;

+      if (EQ (coding->dst_object, coding->src_object))
+	dst_end = (unsigned char *) src;
      if (coding->src_multibyte != coding->dst_multibyte)
 	{
 	  if (coding->src_multibyte)
@ -6057,6 +6134,8 @@ produce_chars (coding, translation_table, last_block)
 			  coding_set_source (coding);
 			  src = coding->source + offset;
 			  src_end = coding->source + coding->src_bytes;
+			  if (EQ (coding->src_object, coding->dst_object))
+			    dst_end = (unsigned char *) src;
 			}
 		    }
 		  *dst++ = c;
@ -6078,13 +6157,19 @@ produce_chars (coding, translation_table, last_block)
 		    if (dst >= dst_end - 1)
 		      {
 			EMACS_INT offset = src - coding->source;
+			EMACS_INT more_bytes;

-			dst = alloc_destination (coding, src_end - src + 2,
-						 dst);
+			if (EQ (coding->src_object, coding->dst_object))
+			  more_bytes = ((src_end - src) / 2) + 2;
+			else
+			  more_bytes = src_end - src + 2;
+			dst = alloc_destination (coding, more_bytes, dst);
 			dst_end = coding->destination + coding->dst_bytes;
 			coding_set_source (coding);
 			src = coding->source + offset;
 			src_end = coding->source + coding->src_bytes;
+			if (EQ (coding->src_object, coding->dst_object))
+			  dst_end = (unsigned char *) src;
 		      }
 		  }
 		EMIT_ONE_BYTE (c);
@ -6572,12 +6657,12 @@ consume_chars (coding, translation_table, max_lookup)
 	  if (coding->encoder == encode_coding_raw_text)
 	    c = *src++, pos++;
 	  else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
-	    c = STRING_CHAR_ADVANCE (src), pos += bytes;
+	    c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
 	  else
 	    c = BYTE8_TO_CHAR (*src), src++, pos++;
 	}
      else
-	c = STRING_CHAR_ADVANCE (src), pos++;
+	c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++;
      if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
 	c = '\n';
      if (! EQ (eol_type, Qunix))