diff --git a/src/editfns.c b/src/editfns.c index 52a3e4e4266..6ffdd3c3109 100644 --- a/src/editfns.c +++ b/src/editfns.c @@ -4525,7 +4525,8 @@ ring. */) ptrdiff_t len1_byte, len_mid_byte, len2_byte; unsigned char *start1_addr, *start2_addr, *temp; - INTERVAL cur_intv, tmp_interval1, tmp_interval2, tmp_interval3; + INTERVAL cur_intv, tmp_interval1, tmp_interval2; + INTERVAL tmp_interval_mid, tmp_interval3; Lisp_Object buf; XSETBUFFER (buf, current_buffer); @@ -4540,7 +4541,9 @@ ring. */) end2 = XFIXNAT (endr2); gap = GPT; - /* Swap the regions if they're reversed. */ + /* Swap the regions if they're reversed. We do not swap the + corresponding Lisp objects as well, since we reference these only + to clear text properties in both regions. */ if (start2 < end1) { register ptrdiff_t glumph = start1; @@ -4560,28 +4563,6 @@ ring. */) else if ((start1 == end1 || start2 == end2) && end1 == start2) return Qnil; - /* The possibilities are: - 1. Adjacent (contiguous) regions, or separate but equal regions - (no, really equal, in this case!), or - 2. Separate regions of unequal size. - - The worst case is usually No. 2. It means that (aside from - potential need for getting the gap out of the way), there also - needs to be a shifting of the text between the two regions. So - if they are spread far apart, we are that much slower... sigh. */ - - /* It must be pointed out that the really studly thing to do would - be not to move the gap at all, but to leave it in place and work - around it if necessary. This would be extremely efficient, - especially considering that people are likely to do - transpositions near where they are working interactively, which - is exactly where the gap would be found. However, such code - would be much harder to write and to read. So, if you are - reading this comment and are feeling squirrely, by all means have - a go! I just didn't feel like doing it, so I will simply move - the gap the minimum distance to get it out of the way, and then - deal with an unbroken array. */ - start1_byte = CHAR_TO_BYTE (start1); end2_byte = CHAR_TO_BYTE (end2); @@ -4597,6 +4578,22 @@ ring. */) /* Run the before-change-functions *before* we move the gap. */ modify_text (start1, end2); + /* It must be pointed out that the really studly thing to do would + be not to move the gap at all, but to leave it in place and work + around it if necessary. This would be extremely efficient, + especially considering that people are likely to do + transpositions near where they are working interactively, which + is exactly where the gap would be found. However, such code + would be much harder to write and to read. So, if you are + reading this comment and are feeling squirrely, by all means have + a go! I just didn't feel like doing it, so I will simply move + the gap the minimum distance to get it out of the way, and then + deal with an unbroken array. */ + + /* Hmmm... how about checking to see if the gap is large + enough to use as the temporary storage? That would avoid an + allocation... interesting. Later, don't fool with it now. */ + /* Make sure the gap won't interfere, by moving it out of the text we will operate on. */ if (start1 < gap && gap < end2) @@ -4637,16 +4634,36 @@ ring. */) } #endif - /* Hmmm... how about checking to see if the gap is large - enough to use as the temporary storage? That would avoid an - allocation... interesting. Later, don't fool with it now. */ + /* The possibilities are: + 1. Regions of equal size, possibly even adjacent (contiguous). + 2. Regions of unequal size. + + In case 1. we can leave the "mid", that is, the region between the + two regions untouched. + + The worst case is usually No. 2. It means that (aside from + potential need for getting the gap out of the way), there also + needs to be a shifting of the text between the two regions. So + if they are spread far apart, we are that much slower... sigh. */ + + /* As an additional difficulty, we have to carefully consider byte vs. + character semantics: Maintaining undo and text properties needs to + be done in terms of characters, swapping text in memory needs to be + done in terms of bytes. + + Handling case 1. mentioned above in a special way is beneficial + both for undo/text properties and for memory swapping, only we have + to consider case 1. for the character-related bits (len1 == len2) + and case 1. for the byte-related bits (len1_byte == len2_byte) + separately. */ tmp_interval1 = copy_intervals (cur_intv, start1, len1); tmp_interval2 = copy_intervals (cur_intv, start2, len2); - USE_SAFE_ALLOCA; - if (len1_byte == len2_byte && len1 == len2) - /* Regions are same size, though, how nice. */ - /* The char lengths also have to match, for text-properties. */ + + len_mid = start2 - end1; + len_mid_byte = start2_byte - end1_byte; + + if (len1 == len2) { if (end1 == start2) /* Merge the two parts into a single one. */ record_change (start1, (end2 - start1)); @@ -4663,7 +4680,24 @@ ring. */) tmp_interval3 = validate_interval_range (buf, &startr2, &endr2, 0); if (tmp_interval3) set_text_properties_1 (startr2, endr2, Qnil, buf, tmp_interval3); + } + else + /* Regions have different length, character-wise. Handle undo and + text properties for both regions as one long piece of text + spanning both regions and the mid. But while doing so, save the + intervals of the mid to later restore them in their new + position. */ + { + record_change (start1, (end2 - start1)); + tmp_interval_mid = copy_intervals (cur_intv, end1, len_mid); + tmp_interval3 = validate_interval_range (buf, &startr1, &endr2, 0); + if (tmp_interval3) + set_text_properties_1 (startr1, endr2, Qnil, buf, tmp_interval3); + } + USE_SAFE_ALLOCA; + if (len1_byte == len2_byte) + { temp = SAFE_ALLOCA (len1_byte); start1_addr = BYTE_POS_ADDR (start1_byte); start2_addr = BYTE_POS_ADDR (start2_byte); @@ -4671,42 +4705,37 @@ ring. */) memcpy (start1_addr, start2_addr, len2_byte); memcpy (start2_addr, temp, len1_byte); } - else + else if (len1_byte < len2_byte) /* Second region larger than first */ + { + /* holds region 2 */ + temp = SAFE_ALLOCA (len2_byte); + start1_addr = BYTE_POS_ADDR (start1_byte); + start2_addr = BYTE_POS_ADDR (start2_byte); + memcpy (temp, start2_addr, len2_byte); + memcpy (start1_addr + len_mid_byte + len2_byte, start1_addr, len1_byte); + memmove (start1_addr + len2_byte, start1_addr + len1_byte, len_mid_byte); + memcpy (start1_addr, temp, len2_byte); + } + else + /* Second region smaller than first. */ + { + /* holds region 1 */ + temp = SAFE_ALLOCA (len1_byte); + start1_addr = BYTE_POS_ADDR (start1_byte); + start2_addr = BYTE_POS_ADDR (start2_byte); + memcpy (temp, start1_addr, len1_byte); + memcpy (start1_addr, start2_addr, len2_byte); + memmove (start1_addr + len2_byte, start1_addr + len1_byte, len_mid_byte); + memcpy (start1_addr + len2_byte + len_mid_byte, temp, len1_byte); + } + SAFE_FREE (); + + if (len1 != len2) + /* Restore intervals of the mid. */ { - len_mid = start2 - end1; - len_mid_byte = start2_byte - end1_byte; - record_change (start1, (end2 - start1)); - INTERVAL tmp_interval_mid = copy_intervals (cur_intv, end1, len_mid); - tmp_interval3 = validate_interval_range (buf, &startr1, &endr2, 0); - if (tmp_interval3) - set_text_properties_1 (startr1, endr2, Qnil, buf, tmp_interval3); - if (len1_byte < len2_byte) /* Second region larger than first */ - { - /* holds region 2 */ - temp = SAFE_ALLOCA (len2_byte); - start1_addr = BYTE_POS_ADDR (start1_byte); - start2_addr = BYTE_POS_ADDR (start2_byte); - memcpy (temp, start2_addr, len2_byte); - memcpy (start1_addr + len_mid_byte + len2_byte, start1_addr, len1_byte); - memmove (start1_addr + len2_byte, start1_addr + len1_byte, len_mid_byte); - memcpy (start1_addr, temp, len2_byte); - } - else - /* Second region smaller than first. */ - { - /* holds region 1 */ - temp = SAFE_ALLOCA (len1_byte); - start1_addr = BYTE_POS_ADDR (start1_byte); - start2_addr = BYTE_POS_ADDR (start2_byte); - memcpy (temp, start1_addr, len1_byte); - memcpy (start1_addr, start2_addr, len2_byte); - memmove (start1_addr + len2_byte, start1_addr + len1_byte, len_mid_byte); - memcpy (start1_addr + len2_byte + len_mid_byte, temp, len1_byte); - } graft_intervals_into_buffer (tmp_interval_mid, start1 + len2, len_mid, current_buffer, 0); } - SAFE_FREE (); graft_intervals_into_buffer (tmp_interval1, end2 - len1, len1, current_buffer, 0); graft_intervals_into_buffer (tmp_interval2, start1, diff --git a/test/src/editfns-tests.el b/test/src/editfns-tests.el index 2fce2315edb..4e0ca4c9d2a 100644 --- a/test/src/editfns-tests.el +++ b/test/src/editfns-tests.el @@ -190,6 +190,210 @@ (should (equal-including-properties str1 (buffer-substring (+ (point-min) 5) (+ (point-min) 7))))))) +(defconst editfns-tests--transpose-regions-tests + '(;; adjacent regions with one being empty + ("" "foo" "" "" "" [0 3 0 0 0]) + ("" "" "" "baz" "" [0 0 0 3 0]) + + ;; For the following tests, assume that characters from the range + ;; [a-z] are 1 byte long in Emacs's internal text representation, + ;; while LATIN SMALL LETTER [AO] WITH DIAERESIS is 2 bytes long. + + ;; (len1 == len2) && (end1 == start2) && (len1_byte == len2_byte) + ("" "fo(o" "" "b)az" "" [0 3 0 3 0]) + ;; (len1 == len2) && (end1 != start2) && (len1_byte == len2_byte) + ("" "fo(o" "[bar]" "b)az" "" [0 3 3 3 0]) + + ;; (len1 != len2) && (end1 != start2) && (len1_byte < len2_byte) + ("" "fo(o" "[bar]" "baaz)" "" [0 3 3 4 0]) + ;; (len1 != len2) && (end1 != start2) && (len1_byte > len2_byte) + ("" "(fooo" "[bar]" "baz)" "" [0 4 3 3 0]) + + ;; (len1 == len2) && (end1 == start2) && (len1_byte < len2_byte) + ("" "fo(o" "" "b)äz" "" [0 3 0 4 0]) + ;; (len1 == len2) && (end1 == start2) && (len1_byte > len2_byte) + ("" "fo(ö" "" "b)az" "" [0 4 0 3 0]) + ;; (len1 == len2) && (end1 != start2) && (len1_byte > len2_byte) + ("" "fo(o" "[bar]" "b)äz" "" [0 3 3 4 0]) + ;; (len1 == len2) && (end1 != start2) && (len1_byte > len2_byte) + ("" "fo(ö" "[bar]" "b)az" "" [0 4 3 3 0]) + + ;; (len1 != len2) && (end1 == start2) && (len1_byte == len2_byte) + ("" "fo(ö" "" "baaz)" "" [0 4 0 4 0]) + ;; (len1 != len2) && (end1 == start2) && (len1_byte == len2_byte) + ("" "(fooo" "" "bäz)" "" [0 4 0 4 0]) + ;; (len1 != len2) && (end1 != start2) && (len1_byte == len2_byte) + ("" "fo(ö" "[bar]" "baaz)" "" [0 4 3 4 0]) + ;; (len1 != len2) && (end1 != start2) && (len1_byte == len2_byte) + ("" "(fooo" "[bar]" "bäz)" "" [0 4 3 4 0]) + + ;; Going entirely non-ASCII. Assume plain greek small letters are + ;; two bytes long in Emacs's internal text representation, GREEK + ;; SMALL LETTER ALPHA WITH PSILI is three bytes long. + + ;; To cover the initial patch from bug#70122, define a test + ;; consisting of three three-letter strings REG1 MID REG2, with + ;; (length REG1) == (length REG2) but (byte-length REG1) != + ;; (byte-length REG2) ... + ("ἀ(ρχή" "φ[ωω" "β){αρ" "β<ἀ]ζ}" "τέλ>ος" [9 6 6 7 10]) + ;; ... and a test with (length REG1) == (length REG2) and + ;; (byte-length REG1) == (byte-length REG2). + ("ἀ(ρχή" "φ[ωω" "β){αρ" "β<α]ζ}" "τέλ>ος" [9 6 6 6 10]) + + ;; Define the moral equivalent of + ;; `editfns-tests--transpose-equal-but-not'. + (" " "(ab)" "[SPC]" "{é}" " " [1 2 3 2 1]) + + ;; Likewise, for the testcase from bug#70122 in + ;; https://debbugs.gnu.org/cgi/bugreport.cgi?bug=70122#5. + ("" "" "(a):\n[b]: \x2113\x2080\n" "{v}: scaling" "" [0 0 13 10 0]) + + ;; Likewise, for the testcase from bug#70122 in + ;; https://debbugs.gnu.org/cgi/bugreport.cgi?bug=70122#52. + ("(Query replace (default abc → d): )" "abc" "[ → ]" "d" "" [35 3 5 1 0])) + "List of test strings and their markup to test `transpose-regions'. +Each element of this list should be a list + + HEAD REG1 MID REG2 TAIL BYTE-LENGTHS + +where the first five elements are (possibly empty) string snippets and +the sixth element is a five-element vector providing the lengths of the +string snippets, counted in bytes in Emacs's internal text +representation. + +Test `editfns-tests--transpose-regions' inserts the five snippets into +its temporary buffer, adds text properties to them as described for +variable `editfns-tests--transpose-regions-markups', transposes REG1 and +REG2, probably undoes the change, and at each stage ensures that all +involved entities look as expected.") + +(defconst editfns-tests--transpose-regions-markups + '("()" "[]" "{}" "<>") + "List of two-characters strings \"BE\" describing text property markup. +For each element in this list, test `editfns-tests--transpose-regions' +searches once for regular expression \"B.+E\" in its temporary buffer, +adds a text property `markup' with value \"BE\" to the matching text, +and then removes the markup characters B and E around the matching text. + +The test searches in the buffer with all test snippets already inserted, +so characters B and E can originate from different snippets, and the +various B's and E's of different markup items do not need to nest.") + +(ert-deftest editfns-tests--transpose-regions () + "Test function `transpose-regions'. +Execute tests as described by `editfns-tests--transpose-regions-tests'." + (dolist (test editfns-tests--transpose-regions-tests) + (dolist (leave-markers '(nil t)) + (message "test: %S leave-markers: %S" test leave-markers) + (with-temp-buffer + (let ((test (take 5 test)) + (blengthv (nth 5 test)) + (smarkers nil) ; Separator markers. + (pmarkers nil) ; Property markers. + (pmpos nil) ; Their positions before transposing. + (strings nil) ; Net text snippets, propertized. + (blengths nil) ; Their lengths in bytes. + (tstrings nil) ; Net text snippets with REG1/2 transposed. + (test-undo nil) + p beg end beg2 end2) + (buffer-enable-undo) + ;; Insert text snippets. While doing so, create the separator + ;; markers which we need later to determine the net text + ;; snippets. + (cl-assert (eq (length test) 5)) + (setq p test) + (while (cdr p) + (insert (car p)) + (push (point-marker) smarkers) + (setq p (cdr p))) + (insert (car p)) + (setq smarkers (nreverse smarkers)) + ;; Propertize them according to markup, remove markup + ;; characters, add property markers. + (dolist (markup editfns-tests--transpose-regions-markups) + (cl-assert (eq (length markup) 2)) + (goto-char (point-min)) + (when (search-forward-regexp + (concat "\\(" + (regexp-quote (substring markup 0 1)) + ".+" + (regexp-quote (substring markup 1 2)) + "\\)") + nil t) + (setq beg (copy-marker (match-beginning 1)) + end (copy-marker (match-end 1))) + (delete-region beg (1+ beg)) + (delete-region (1- end) end) + (add-text-properties beg end (list 'markup markup)) + (push beg pmarkers) + (push end pmarkers))) + (setq pmarkers (sort pmarkers) + pmpos (mapcar #'marker-position pmarkers)) + ;; Determine net text snippets, plain and with transposed REG1 + ;; and REG2. Determine the byte lengths of the net text + ;; snippets and ensure they meet our expectation. + (setq p smarkers + beg (point-min)) + (while p + (push (buffer-substring beg (car p)) strings) + (push (- (position-bytes (car p)) (position-bytes beg)) + blengths) + (setq beg (car p) p (cdr p))) + (push (buffer-substring beg (point-max)) strings) + (push (- (position-bytes (point-max)) (position-bytes beg)) + blengths) + (setq strings (nreverse strings) + blengths (nreverse blengths)) + (setq tstrings (list (nth 0 strings) (nth 3 strings) + (nth 2 strings) (nth 1 strings) + (nth 4 strings))) + (should (equal blengthv (apply #'vector blengths))) + ;; Transpose REG1 and REG2. Some transpositions might not + ;; generate undo, keep track of that in flag `test-undo'. + (setq beg (+ 1 (length (nth 0 strings))) + end (+ beg (length (nth 1 strings))) + beg2 (+ end (length (nth 2 strings))) + end2 (+ beg2 (length (nth 3 strings)))) + (undo-boundary) + (transpose-regions beg end beg2 end2 leave-markers) + (when (car buffer-undo-list) + (setq test-undo t)) + (undo-boundary) + ;; Check resulting buffer text and its properties. + (should (equal-including-properties + (buffer-string) + (mapconcat #'identity tstrings))) + ;; Check property marker positions. + (if leave-markers + (should (equal (mapcar #'marker-position pmarkers) pmpos)) + ;; Meh. This more or less blindly duplicates function + ;; transpose_markers, since I have been too lazy to + ;; reproduce the arithmetics myself. + (setq pmpos + (mapcar + (lambda (pos) + (cond + ((< pos beg) pos) + ((>= pos end2) pos) + ((< pos end) (+ pos (+ (- end2 beg2) (- beg2 end)))) + ((< pos beg2) (+ pos (- (- end2 beg2) (- end beg)))) + (t (- pos (+ (- end beg) (- beg2 end)))))) + pmpos)) + (should (equal (mapcar #'marker-position pmarkers) pmpos))) + ;; Undo the transposition and check text and properties again, + ;; if needed. This does not undo any marker transpositions as + ;; per the comment before the call to transpose_markers in + ;; Ftranspose_regions, so nothing to check on the marker side + ;; after the undo. + (when test-undo + (undo) + (should (equal-including-properties + (buffer-string) + (mapconcat #'identity strings)))) + ;; Be nice and clean up markers. + (dolist (marker smarkers) (set-marker marker nil)) + (dolist (marker pmarkers) (set-marker marker nil))))))) + (ert-deftest format-c-float () (should-error (format "%c" 0.5)))