1
Fork 0
mirror of git://git.sv.gnu.org/emacs.git synced 2025-12-06 06:20:55 -08:00

text-index.c: Skip the byte scan when it's all ASCII

* src/text-index.c (is_close_enough_charpos): Remove unused arg `ti`.
(narrow_bytepos_bounds_1, narrow_charpos_bounds_1)
(narrow_bytepos_bounds, narrow_charpos_bounds):
Don't short-circuit if a known point is exactly equal to what we're
looking for, thus don't return value.
(text_index_bytepos_to_charpos, text_index_charpos_to_bytepos):
Short-circuit here instead after narrowing.
Make the two functions more alike.  Short-circuit also when the text
remaining to scan is all ASCII.
This commit is contained in:
Stefan Monnier 2025-04-22 14:12:00 -04:00
parent 0b780f8619
commit c8beb5f023

View file

@ -91,7 +91,9 @@ struct text_index
enum
{
/* Number of bytes in an interval. */
/* Number of bytes in an interval.
Tradeoff between cost of the text-index array and cost of scanning
bytes between the positions recorded in the array. */
TEXT_INDEX_INTERVAL = 4096,
/* Default capacity in number of intervals for text indices. */
@ -138,8 +140,7 @@ pt_pos (const struct buffer *b)
necessary. */
static bool
is_close_enough_charpos (const struct text_index *ti,
ptrdiff_t charpos,
is_close_enough_charpos (ptrdiff_t charpos,
const struct text_pos pos)
{
return eabs (charpos - pos.charpos) < TEXT_INDEX_INTERVAL / 4;
@ -503,103 +504,77 @@ next_known_text_pos (struct buffer *b, ptrdiff_t entry)
}
/* Improve the known bytepos bounds *PREV and *NEXT if KNOWN is closer
to BYTEPOS. If KNOWN is an exact match for BYTEPOS return true. */
to BYTEPOS. */
static bool
static void
narrow_bytepos_bounds_1 (const struct text_pos known, struct text_pos *prev,
struct text_pos *next, const ptrdiff_t bytepos)
{
eassert (bytepos >= prev->bytepos && bytepos <= next->bytepos);
eassert (known.bytepos != TEXT_INDEX_INVALID_POSITION);
if (known.bytepos == bytepos)
return true;
/* If KNOWN is in (PREV, BYTEPOS] it is a better PREV. */
if (known.bytepos < bytepos
if (known.bytepos <= bytepos
&& known.bytepos > prev->bytepos)
*prev = known;
/* If KNOWN is in [BYTEPOS NEXT) it is a better NEXT. */
if (known.bytepos > bytepos
if (known.bytepos >= bytepos
&& known.bytepos < next->bytepos)
*next = known;
return false;
}
/* Improve the known bytepos bounds *PREV and *NEXT of buffer B using
known positions in B. BYTEPOS is a byte position to convert to a
character position. If an exact match for BYTEPOS is found, return
its charpos, otherwise return TEXT_INDEX_INVALID_POSITION. */
character position. */
static ptrdiff_t
static void
narrow_bytepos_bounds (struct buffer *b, struct text_pos *prev,
struct text_pos *next, const ptrdiff_t bytepos)
{
const struct text_pos pt = pt_pos (b);
if (narrow_bytepos_bounds_1 (pt, prev, next, bytepos))
return pt.charpos;
const struct text_pos gpt = gpt_pos (b);
if (narrow_bytepos_bounds_1 (gpt, prev, next, bytepos))
return gpt.charpos;
narrow_bytepos_bounds_1 (pt_pos (b), prev, next, bytepos);
narrow_bytepos_bounds_1 (gpt_pos (b), prev, next, bytepos);
struct text_index *ti = b->text->index;
if (is_cache_valid (ti)
&& narrow_bytepos_bounds_1 (ti->cache, prev, next, bytepos))
return ti->cache.charpos;
return TEXT_INDEX_INVALID_POSITION;
if (is_cache_valid (ti))
narrow_bytepos_bounds_1 (ti->cache, prev, next, bytepos);
}
/* Improve the known bytepos bounds *PREV and *NEXT if KNOWN is closer
to BYTEPOS. If KNOWN is an exact match for BYTEPOS return true. */
to BYTEPOS. */
static bool
static void
narrow_charpos_bounds_1 (const struct text_pos known, struct text_pos *prev,
struct text_pos *next, const ptrdiff_t charpos)
{
eassert (charpos >= prev->charpos && charpos <= next->charpos);
eassert (known.charpos != TEXT_INDEX_INVALID_POSITION);
if (known.charpos == charpos)
return true;
/* If KNOWN is in (PREV, BYTEPOS] it is a better PREV. */
if (known.charpos < charpos
if (known.charpos <= charpos
&& known.charpos > prev->charpos)
*prev = known;
/* If KNOWN is in [BYTEPOS NEXT) it is a better NEXT. */
if (known.charpos > charpos
if (known.charpos >= charpos
&& known.charpos < next->charpos)
*next = known;
return false;
}
/* Improve the known bytepos bounds *PREV and *NEXT of buffer B using
known positions in B. BYTEPOS is a byte position to convert to a
character position. If an exact match for BYTEPOS is found, return
its charpos, otherwise return TEXT_INDEX_INVALID_POSITION. */
character position. */
static ptrdiff_t
static void
narrow_charpos_bounds (struct buffer *b, struct text_pos *prev,
struct text_pos *next, const ptrdiff_t charpos)
{
const struct text_pos pt = pt_pos (b);
if (narrow_charpos_bounds_1 (pt, prev, next, charpos))
return pt.bytepos;
const struct text_pos gpt = gpt_pos (b);
if (narrow_charpos_bounds_1 (gpt, prev, next, charpos))
return gpt.bytepos;
narrow_charpos_bounds_1 (pt_pos (b), prev, next, charpos);
narrow_charpos_bounds_1 (gpt_pos (b), prev, next, charpos);
struct text_index *ti = b->text->index;
if (is_cache_valid (ti)
&& narrow_charpos_bounds_1 (ti->cache, prev, next, charpos))
return ti->cache.bytepos;
return TEXT_INDEX_INVALID_POSITION;
if (is_cache_valid (ti))
narrow_charpos_bounds_1 (ti->cache, prev, next, charpos);
}
/* Return the character position in buffer B corresponding to
@ -608,6 +583,7 @@ narrow_charpos_bounds (struct buffer *b, struct text_pos *prev,
ptrdiff_t
buf_bytepos_to_charpos (struct buffer *b, const ptrdiff_t bytepos)
{
/* FIXME: Can BYTEPOS ever be outside of BEGV_BYTE..ZV_BYTE? */
/* If this buffer has as many characters as bytes, each character must
be one byte. This takes care of the case where
enable-multibyte-characters is nil. */
@ -615,30 +591,39 @@ buf_bytepos_to_charpos (struct buffer *b, const ptrdiff_t bytepos)
if (z.charpos == z.bytepos)
return bytepos;
/* BYTEPOS == Z_BYTE, and BYTEPOS is an interval boundary,
then BYTEPOS does not have an index entry because we don't want
extra entries for (Z, Z_BYTE). Changing that would be possible
but leads to more code than this if-statement, so it's probably
not worth it. */
if (bytepos == z.bytepos)
return z.charpos;
/* Begin with the interval (BEG, Z), and improve on that by taking known
positions into account like PT, GPT and the cache. This might
already find the answer. */
struct text_index *ti = ensure_has_index (b);
struct text_pos prev = beg_pos (b);
struct text_pos next = z;
narrow_bytepos_bounds (b, &prev, &next, bytepos);
/* Z_BYTE does not have an index entry because we don't want
extra entries for (Z, Z_BYTE), so short-circuit *before* looking
up the index. Changing that would be possible but leads to more
code than this if-statement, so it's probably not worth it. */
if (next.bytepos == bytepos)
return next.charpos;
ensure_bytepos_indexed (b, bytepos);
struct text_index *ti = b->text->index;
const ptrdiff_t entry = index_bytepos_entry (ti, bytepos);
struct text_pos prev = index_text_pos (ti, entry);
struct text_pos next = next_known_text_pos (b, entry);
narrow_bytepos_bounds_1 (index_text_pos (ti, entry), &prev, &next, bytepos);
narrow_bytepos_bounds_1 (next_known_text_pos (b, entry),
&prev, &next, bytepos);
ptrdiff_t charpos = narrow_bytepos_bounds (b, &prev, &next, bytepos);
if (charpos != TEXT_INDEX_INVALID_POSITION)
return charpos;
if (next.charpos - prev.charpos == next.bytepos - prev.bytepos
/* Beware: NEXT and PREV can be in the middle of multibyte chars! */
&& CHAR_HEAD_P (BUF_FETCH_BYTE (b, prev.bytepos)))
return prev.charpos + (bytepos - prev.bytepos); /* ASCII-only! */
/* Scan forward if the distance to the previous known position is
smaller than the distance to the next known position. */
if (bytepos - prev.bytepos < next.bytepos - bytepos)
charpos = charpos_forward_to_bytepos (b, prev, bytepos);
else
charpos = charpos_backward_to_bytepos (b, next, bytepos);
ptrdiff_t charpos
= (bytepos - prev.bytepos < next.bytepos - bytepos)
? charpos_forward_to_bytepos (b, prev, bytepos)
: charpos_backward_to_bytepos (b, next, bytepos);
cache (ti, charpos, bytepos);
return charpos;
@ -650,6 +635,7 @@ buf_bytepos_to_charpos (struct buffer *b, const ptrdiff_t bytepos)
ptrdiff_t
buf_charpos_to_bytepos (struct buffer *b, const ptrdiff_t charpos)
{
/* FIXME: Can CHARPOS ever be outside of BEGV..ZV? */
/* If this buffer has as many characters as bytes, each character must
be one byte. This takes care of the case where
enable-multibyte-characters is nil. */
@ -657,25 +643,24 @@ buf_charpos_to_bytepos (struct buffer *b, const ptrdiff_t charpos)
if (z.charpos == z.bytepos)
return charpos;
if (charpos == z.charpos)
return z.bytepos;
ensure_charpos_indexed (b, charpos);
/* Begin with the interval (BEG, Z), and improve on that by taking known
positions into account like PT, GPT and the cache. This might
already find the bytepos. */
already find the answer. */
struct text_index *ti = ensure_has_index (b);
struct text_pos prev = beg_pos (b);
struct text_pos next = z;
ptrdiff_t bytepos = narrow_charpos_bounds (b, &prev, &next, charpos);
if (bytepos != TEXT_INDEX_INVALID_POSITION)
return bytepos;
narrow_charpos_bounds (b, &prev, &next, charpos);
if (next.charpos - prev.charpos == next.bytepos - prev.bytepos)
return prev.bytepos + (charpos - prev.charpos); /* ASCII-only! */
else if (next.charpos == charpos)
return next.bytepos;
/* If one of the bounds is already good enough, avoid consulting
the index since that involves some overhead. */
if (!is_close_enough_charpos (ti, charpos, prev)
&& !is_close_enough_charpos (ti, charpos, next))
if (!is_close_enough_charpos (charpos, prev)
&& !is_close_enough_charpos (charpos, next))
{
ensure_charpos_indexed (b, charpos);
const ptrdiff_t entry = index_charpos_entry (ti, charpos);
@ -683,19 +668,23 @@ buf_charpos_to_bytepos (struct buffer *b, const ptrdiff_t charpos)
narrow_charpos_bounds_1 (index_prev, &prev, &next, charpos);
const struct text_pos index_next = next_known_text_pos (b, entry);
narrow_charpos_bounds_1 (index_next, &prev, &next, charpos);
if (next.charpos - prev.charpos == next.bytepos - prev.bytepos
/* Beware: NEXT and PREV can be in the middle of multibyte chars! */
&& CHAR_HEAD_P (BUF_FETCH_BYTE (b, prev.bytepos))
&& CHAR_HEAD_P (BUF_FETCH_BYTE (b, next.bytepos - 1)))
return prev.bytepos + (charpos - prev.charpos); /* ASCII-only! */
}
/* Don't scan forward if CHARPOS is exactly on the previous know
position because the index bytepos can be in the middle of a
character, which is found by scanning backwards. Otherwise, scan
forward if we believe that's less expensive. */
if (charpos > prev.charpos
ptrdiff_t bytepos
= (charpos > prev.charpos
&& charpos - prev.charpos < next.charpos - charpos)
bytepos = bytepos_forward_to_charpos (b, prev, charpos);
else
bytepos = bytepos_backward_to_charpos (b, next, charpos);
? bytepos_forward_to_charpos (b, prev, charpos)
: bytepos_backward_to_charpos (b, next, charpos);
cache (ti, charpos, bytepos);
return bytepos;