1
Fork 0
mirror of git://git.sv.gnu.org/emacs.git synced 2025-12-06 06:20:55 -08:00

text-index.c: Skip the byte scan when it's all ASCII

* src/text-index.c (is_close_enough_charpos): Remove unused arg `ti`.
(narrow_bytepos_bounds_1, narrow_charpos_bounds_1)
(narrow_bytepos_bounds, narrow_charpos_bounds):
Don't short-circuit if a known point is exactly equal to what we're
looking for, thus don't return value.
(text_index_bytepos_to_charpos, text_index_charpos_to_bytepos):
Short-circuit here instead after narrowing.
Make the two functions more alike.  Short-circuit also when the text
remaining to scan is all ASCII.
This commit is contained in:
Stefan Monnier 2025-04-22 14:12:00 -04:00
parent 0b780f8619
commit c8beb5f023

View file

@ -91,7 +91,9 @@ struct text_index
enum enum
{ {
/* Number of bytes in an interval. */ /* Number of bytes in an interval.
Tradeoff between cost of the text-index array and cost of scanning
bytes between the positions recorded in the array. */
TEXT_INDEX_INTERVAL = 4096, TEXT_INDEX_INTERVAL = 4096,
/* Default capacity in number of intervals for text indices. */ /* Default capacity in number of intervals for text indices. */
@ -138,8 +140,7 @@ pt_pos (const struct buffer *b)
necessary. */ necessary. */
static bool static bool
is_close_enough_charpos (const struct text_index *ti, is_close_enough_charpos (ptrdiff_t charpos,
ptrdiff_t charpos,
const struct text_pos pos) const struct text_pos pos)
{ {
return eabs (charpos - pos.charpos) < TEXT_INDEX_INTERVAL / 4; return eabs (charpos - pos.charpos) < TEXT_INDEX_INTERVAL / 4;
@ -503,103 +504,77 @@ next_known_text_pos (struct buffer *b, ptrdiff_t entry)
} }
/* Improve the known bytepos bounds *PREV and *NEXT if KNOWN is closer /* Improve the known bytepos bounds *PREV and *NEXT if KNOWN is closer
to BYTEPOS. If KNOWN is an exact match for BYTEPOS return true. */ to BYTEPOS. */
static bool static void
narrow_bytepos_bounds_1 (const struct text_pos known, struct text_pos *prev, narrow_bytepos_bounds_1 (const struct text_pos known, struct text_pos *prev,
struct text_pos *next, const ptrdiff_t bytepos) struct text_pos *next, const ptrdiff_t bytepos)
{ {
eassert (bytepos >= prev->bytepos && bytepos <= next->bytepos); eassert (bytepos >= prev->bytepos && bytepos <= next->bytepos);
eassert (known.bytepos != TEXT_INDEX_INVALID_POSITION); eassert (known.bytepos != TEXT_INDEX_INVALID_POSITION);
if (known.bytepos == bytepos)
return true;
/* If KNOWN is in (PREV, BYTEPOS] it is a better PREV. */ /* If KNOWN is in (PREV, BYTEPOS] it is a better PREV. */
if (known.bytepos < bytepos if (known.bytepos <= bytepos
&& known.bytepos > prev->bytepos) && known.bytepos > prev->bytepos)
*prev = known; *prev = known;
/* If KNOWN is in [BYTEPOS NEXT) it is a better NEXT. */ /* If KNOWN is in [BYTEPOS NEXT) it is a better NEXT. */
if (known.bytepos > bytepos if (known.bytepos >= bytepos
&& known.bytepos < next->bytepos) && known.bytepos < next->bytepos)
*next = known; *next = known;
return false;
} }
/* Improve the known bytepos bounds *PREV and *NEXT of buffer B using /* Improve the known bytepos bounds *PREV and *NEXT of buffer B using
known positions in B. BYTEPOS is a byte position to convert to a known positions in B. BYTEPOS is a byte position to convert to a
character position. If an exact match for BYTEPOS is found, return character position. */
its charpos, otherwise return TEXT_INDEX_INVALID_POSITION. */
static ptrdiff_t static void
narrow_bytepos_bounds (struct buffer *b, struct text_pos *prev, narrow_bytepos_bounds (struct buffer *b, struct text_pos *prev,
struct text_pos *next, const ptrdiff_t bytepos) struct text_pos *next, const ptrdiff_t bytepos)
{ {
const struct text_pos pt = pt_pos (b); narrow_bytepos_bounds_1 (pt_pos (b), prev, next, bytepos);
if (narrow_bytepos_bounds_1 (pt, prev, next, bytepos)) narrow_bytepos_bounds_1 (gpt_pos (b), prev, next, bytepos);
return pt.charpos;
const struct text_pos gpt = gpt_pos (b);
if (narrow_bytepos_bounds_1 (gpt, prev, next, bytepos))
return gpt.charpos;
struct text_index *ti = b->text->index; struct text_index *ti = b->text->index;
if (is_cache_valid (ti) if (is_cache_valid (ti))
&& narrow_bytepos_bounds_1 (ti->cache, prev, next, bytepos)) narrow_bytepos_bounds_1 (ti->cache, prev, next, bytepos);
return ti->cache.charpos;
return TEXT_INDEX_INVALID_POSITION;
} }
/* Improve the known bytepos bounds *PREV and *NEXT if KNOWN is closer /* Improve the known bytepos bounds *PREV and *NEXT if KNOWN is closer
to BYTEPOS. If KNOWN is an exact match for BYTEPOS return true. */ to BYTEPOS. */
static bool static void
narrow_charpos_bounds_1 (const struct text_pos known, struct text_pos *prev, narrow_charpos_bounds_1 (const struct text_pos known, struct text_pos *prev,
struct text_pos *next, const ptrdiff_t charpos) struct text_pos *next, const ptrdiff_t charpos)
{ {
eassert (charpos >= prev->charpos && charpos <= next->charpos); eassert (charpos >= prev->charpos && charpos <= next->charpos);
eassert (known.charpos != TEXT_INDEX_INVALID_POSITION); eassert (known.charpos != TEXT_INDEX_INVALID_POSITION);
if (known.charpos == charpos)
return true;
/* If KNOWN is in (PREV, BYTEPOS] it is a better PREV. */ /* If KNOWN is in (PREV, BYTEPOS] it is a better PREV. */
if (known.charpos < charpos if (known.charpos <= charpos
&& known.charpos > prev->charpos) && known.charpos > prev->charpos)
*prev = known; *prev = known;
/* If KNOWN is in [BYTEPOS NEXT) it is a better NEXT. */ /* If KNOWN is in [BYTEPOS NEXT) it is a better NEXT. */
if (known.charpos > charpos if (known.charpos >= charpos
&& known.charpos < next->charpos) && known.charpos < next->charpos)
*next = known; *next = known;
return false;
} }
/* Improve the known bytepos bounds *PREV and *NEXT of buffer B using /* Improve the known bytepos bounds *PREV and *NEXT of buffer B using
known positions in B. BYTEPOS is a byte position to convert to a known positions in B. BYTEPOS is a byte position to convert to a
character position. If an exact match for BYTEPOS is found, return character position. */
its charpos, otherwise return TEXT_INDEX_INVALID_POSITION. */
static ptrdiff_t static void
narrow_charpos_bounds (struct buffer *b, struct text_pos *prev, narrow_charpos_bounds (struct buffer *b, struct text_pos *prev,
struct text_pos *next, const ptrdiff_t charpos) struct text_pos *next, const ptrdiff_t charpos)
{ {
const struct text_pos pt = pt_pos (b); narrow_charpos_bounds_1 (pt_pos (b), prev, next, charpos);
if (narrow_charpos_bounds_1 (pt, prev, next, charpos)) narrow_charpos_bounds_1 (gpt_pos (b), prev, next, charpos);
return pt.bytepos;
const struct text_pos gpt = gpt_pos (b);
if (narrow_charpos_bounds_1 (gpt, prev, next, charpos))
return gpt.bytepos;
struct text_index *ti = b->text->index; struct text_index *ti = b->text->index;
if (is_cache_valid (ti) if (is_cache_valid (ti))
&& narrow_charpos_bounds_1 (ti->cache, prev, next, charpos)) narrow_charpos_bounds_1 (ti->cache, prev, next, charpos);
return ti->cache.bytepos;
return TEXT_INDEX_INVALID_POSITION;
} }
/* Return the character position in buffer B corresponding to /* Return the character position in buffer B corresponding to
@ -608,6 +583,7 @@ narrow_charpos_bounds (struct buffer *b, struct text_pos *prev,
ptrdiff_t ptrdiff_t
buf_bytepos_to_charpos (struct buffer *b, const ptrdiff_t bytepos) buf_bytepos_to_charpos (struct buffer *b, const ptrdiff_t bytepos)
{ {
/* FIXME: Can BYTEPOS ever be outside of BEGV_BYTE..ZV_BYTE? */
/* If this buffer has as many characters as bytes, each character must /* If this buffer has as many characters as bytes, each character must
be one byte. This takes care of the case where be one byte. This takes care of the case where
enable-multibyte-characters is nil. */ enable-multibyte-characters is nil. */
@ -615,30 +591,39 @@ buf_bytepos_to_charpos (struct buffer *b, const ptrdiff_t bytepos)
if (z.charpos == z.bytepos) if (z.charpos == z.bytepos)
return bytepos; return bytepos;
/* BYTEPOS == Z_BYTE, and BYTEPOS is an interval boundary, /* Begin with the interval (BEG, Z), and improve on that by taking known
then BYTEPOS does not have an index entry because we don't want positions into account like PT, GPT and the cache. This might
extra entries for (Z, Z_BYTE). Changing that would be possible already find the answer. */
but leads to more code than this if-statement, so it's probably struct text_index *ti = ensure_has_index (b);
not worth it. */ struct text_pos prev = beg_pos (b);
if (bytepos == z.bytepos) struct text_pos next = z;
return z.charpos;
narrow_bytepos_bounds (b, &prev, &next, bytepos);
/* Z_BYTE does not have an index entry because we don't want
extra entries for (Z, Z_BYTE), so short-circuit *before* looking
up the index. Changing that would be possible but leads to more
code than this if-statement, so it's probably not worth it. */
if (next.bytepos == bytepos)
return next.charpos;
ensure_bytepos_indexed (b, bytepos); ensure_bytepos_indexed (b, bytepos);
struct text_index *ti = b->text->index;
const ptrdiff_t entry = index_bytepos_entry (ti, bytepos); const ptrdiff_t entry = index_bytepos_entry (ti, bytepos);
struct text_pos prev = index_text_pos (ti, entry); narrow_bytepos_bounds_1 (index_text_pos (ti, entry), &prev, &next, bytepos);
struct text_pos next = next_known_text_pos (b, entry); narrow_bytepos_bounds_1 (next_known_text_pos (b, entry),
&prev, &next, bytepos);
ptrdiff_t charpos = narrow_bytepos_bounds (b, &prev, &next, bytepos); if (next.charpos - prev.charpos == next.bytepos - prev.bytepos
if (charpos != TEXT_INDEX_INVALID_POSITION) /* Beware: NEXT and PREV can be in the middle of multibyte chars! */
return charpos; && CHAR_HEAD_P (BUF_FETCH_BYTE (b, prev.bytepos)))
return prev.charpos + (bytepos - prev.bytepos); /* ASCII-only! */
/* Scan forward if the distance to the previous known position is /* Scan forward if the distance to the previous known position is
smaller than the distance to the next known position. */ smaller than the distance to the next known position. */
if (bytepos - prev.bytepos < next.bytepos - bytepos) ptrdiff_t charpos
charpos = charpos_forward_to_bytepos (b, prev, bytepos); = (bytepos - prev.bytepos < next.bytepos - bytepos)
else ? charpos_forward_to_bytepos (b, prev, bytepos)
charpos = charpos_backward_to_bytepos (b, next, bytepos); : charpos_backward_to_bytepos (b, next, bytepos);
cache (ti, charpos, bytepos); cache (ti, charpos, bytepos);
return charpos; return charpos;
@ -650,6 +635,7 @@ buf_bytepos_to_charpos (struct buffer *b, const ptrdiff_t bytepos)
ptrdiff_t ptrdiff_t
buf_charpos_to_bytepos (struct buffer *b, const ptrdiff_t charpos) buf_charpos_to_bytepos (struct buffer *b, const ptrdiff_t charpos)
{ {
/* FIXME: Can CHARPOS ever be outside of BEGV..ZV? */
/* If this buffer has as many characters as bytes, each character must /* If this buffer has as many characters as bytes, each character must
be one byte. This takes care of the case where be one byte. This takes care of the case where
enable-multibyte-characters is nil. */ enable-multibyte-characters is nil. */
@ -657,25 +643,24 @@ buf_charpos_to_bytepos (struct buffer *b, const ptrdiff_t charpos)
if (z.charpos == z.bytepos) if (z.charpos == z.bytepos)
return charpos; return charpos;
if (charpos == z.charpos)
return z.bytepos;
ensure_charpos_indexed (b, charpos);
/* Begin with the interval (BEG, Z), and improve on that by taking known /* Begin with the interval (BEG, Z), and improve on that by taking known
positions into account like PT, GPT and the cache. This might positions into account like PT, GPT and the cache. This might
already find the bytepos. */ already find the answer. */
struct text_index *ti = ensure_has_index (b); struct text_index *ti = ensure_has_index (b);
struct text_pos prev = beg_pos (b); struct text_pos prev = beg_pos (b);
struct text_pos next = z; struct text_pos next = z;
ptrdiff_t bytepos = narrow_charpos_bounds (b, &prev, &next, charpos); narrow_charpos_bounds (b, &prev, &next, charpos);
if (bytepos != TEXT_INDEX_INVALID_POSITION)
return bytepos; if (next.charpos - prev.charpos == next.bytepos - prev.bytepos)
return prev.bytepos + (charpos - prev.charpos); /* ASCII-only! */
else if (next.charpos == charpos)
return next.bytepos;
/* If one of the bounds is already good enough, avoid consulting /* If one of the bounds is already good enough, avoid consulting
the index since that involves some overhead. */ the index since that involves some overhead. */
if (!is_close_enough_charpos (ti, charpos, prev) if (!is_close_enough_charpos (charpos, prev)
&& !is_close_enough_charpos (ti, charpos, next)) && !is_close_enough_charpos (charpos, next))
{ {
ensure_charpos_indexed (b, charpos); ensure_charpos_indexed (b, charpos);
const ptrdiff_t entry = index_charpos_entry (ti, charpos); const ptrdiff_t entry = index_charpos_entry (ti, charpos);
@ -683,19 +668,23 @@ buf_charpos_to_bytepos (struct buffer *b, const ptrdiff_t charpos)
narrow_charpos_bounds_1 (index_prev, &prev, &next, charpos); narrow_charpos_bounds_1 (index_prev, &prev, &next, charpos);
const struct text_pos index_next = next_known_text_pos (b, entry); const struct text_pos index_next = next_known_text_pos (b, entry);
narrow_charpos_bounds_1 (index_next, &prev, &next, charpos); narrow_charpos_bounds_1 (index_next, &prev, &next, charpos);
if (next.charpos - prev.charpos == next.bytepos - prev.bytepos
/* Beware: NEXT and PREV can be in the middle of multibyte chars! */
&& CHAR_HEAD_P (BUF_FETCH_BYTE (b, prev.bytepos))
&& CHAR_HEAD_P (BUF_FETCH_BYTE (b, next.bytepos - 1)))
return prev.bytepos + (charpos - prev.charpos); /* ASCII-only! */
} }
/* Don't scan forward if CHARPOS is exactly on the previous know /* Don't scan forward if CHARPOS is exactly on the previous know
position because the index bytepos can be in the middle of a position because the index bytepos can be in the middle of a
character, which is found by scanning backwards. Otherwise, scan character, which is found by scanning backwards. Otherwise, scan
forward if we believe that's less expensive. */ forward if we believe that's less expensive. */
if (charpos > prev.charpos ptrdiff_t bytepos
= (charpos > prev.charpos
&& charpos - prev.charpos < next.charpos - charpos) && charpos - prev.charpos < next.charpos - charpos)
bytepos = bytepos_forward_to_charpos (b, prev, charpos); ? bytepos_forward_to_charpos (b, prev, charpos)
else : bytepos_backward_to_charpos (b, next, charpos);
bytepos = bytepos_backward_to_charpos (b, next, charpos);
cache (ti, charpos, bytepos); cache (ti, charpos, bytepos);
return bytepos; return bytepos;