mirror of
git://git.sv.gnu.org/emacs.git
synced 2025-12-06 06:20:55 -08:00
text-index.c: Skip the byte scan when it's all ASCII
* src/text-index.c (is_close_enough_charpos): Remove unused arg `ti`. (narrow_bytepos_bounds_1, narrow_charpos_bounds_1) (narrow_bytepos_bounds, narrow_charpos_bounds): Don't short-circuit if a known point is exactly equal to what we're looking for, thus don't return value. (text_index_bytepos_to_charpos, text_index_charpos_to_bytepos): Short-circuit here instead after narrowing. Make the two functions more alike. Short-circuit also when the text remaining to scan is all ASCII.
This commit is contained in:
parent
0b780f8619
commit
c8beb5f023
1 changed files with 74 additions and 85 deletions
155
src/text-index.c
155
src/text-index.c
|
|
@ -91,7 +91,9 @@ struct text_index
|
||||||
|
|
||||||
enum
|
enum
|
||||||
{
|
{
|
||||||
/* Number of bytes in an interval. */
|
/* Number of bytes in an interval.
|
||||||
|
Tradeoff between cost of the text-index array and cost of scanning
|
||||||
|
bytes between the positions recorded in the array. */
|
||||||
TEXT_INDEX_INTERVAL = 4096,
|
TEXT_INDEX_INTERVAL = 4096,
|
||||||
|
|
||||||
/* Default capacity in number of intervals for text indices. */
|
/* Default capacity in number of intervals for text indices. */
|
||||||
|
|
@ -138,8 +140,7 @@ pt_pos (const struct buffer *b)
|
||||||
necessary. */
|
necessary. */
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
is_close_enough_charpos (const struct text_index *ti,
|
is_close_enough_charpos (ptrdiff_t charpos,
|
||||||
ptrdiff_t charpos,
|
|
||||||
const struct text_pos pos)
|
const struct text_pos pos)
|
||||||
{
|
{
|
||||||
return eabs (charpos - pos.charpos) < TEXT_INDEX_INTERVAL / 4;
|
return eabs (charpos - pos.charpos) < TEXT_INDEX_INTERVAL / 4;
|
||||||
|
|
@ -503,103 +504,77 @@ next_known_text_pos (struct buffer *b, ptrdiff_t entry)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Improve the known bytepos bounds *PREV and *NEXT if KNOWN is closer
|
/* Improve the known bytepos bounds *PREV and *NEXT if KNOWN is closer
|
||||||
to BYTEPOS. If KNOWN is an exact match for BYTEPOS return true. */
|
to BYTEPOS. */
|
||||||
|
|
||||||
static bool
|
static void
|
||||||
narrow_bytepos_bounds_1 (const struct text_pos known, struct text_pos *prev,
|
narrow_bytepos_bounds_1 (const struct text_pos known, struct text_pos *prev,
|
||||||
struct text_pos *next, const ptrdiff_t bytepos)
|
struct text_pos *next, const ptrdiff_t bytepos)
|
||||||
{
|
{
|
||||||
eassert (bytepos >= prev->bytepos && bytepos <= next->bytepos);
|
eassert (bytepos >= prev->bytepos && bytepos <= next->bytepos);
|
||||||
eassert (known.bytepos != TEXT_INDEX_INVALID_POSITION);
|
eassert (known.bytepos != TEXT_INDEX_INVALID_POSITION);
|
||||||
if (known.bytepos == bytepos)
|
|
||||||
return true;
|
|
||||||
|
|
||||||
/* If KNOWN is in (PREV, BYTEPOS] it is a better PREV. */
|
/* If KNOWN is in (PREV, BYTEPOS] it is a better PREV. */
|
||||||
if (known.bytepos < bytepos
|
if (known.bytepos <= bytepos
|
||||||
&& known.bytepos > prev->bytepos)
|
&& known.bytepos > prev->bytepos)
|
||||||
*prev = known;
|
*prev = known;
|
||||||
|
|
||||||
/* If KNOWN is in [BYTEPOS NEXT) it is a better NEXT. */
|
/* If KNOWN is in [BYTEPOS NEXT) it is a better NEXT. */
|
||||||
if (known.bytepos > bytepos
|
if (known.bytepos >= bytepos
|
||||||
&& known.bytepos < next->bytepos)
|
&& known.bytepos < next->bytepos)
|
||||||
*next = known;
|
*next = known;
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Improve the known bytepos bounds *PREV and *NEXT of buffer B using
|
/* Improve the known bytepos bounds *PREV and *NEXT of buffer B using
|
||||||
known positions in B. BYTEPOS is a byte position to convert to a
|
known positions in B. BYTEPOS is a byte position to convert to a
|
||||||
character position. If an exact match for BYTEPOS is found, return
|
character position. */
|
||||||
its charpos, otherwise return TEXT_INDEX_INVALID_POSITION. */
|
|
||||||
|
|
||||||
static ptrdiff_t
|
static void
|
||||||
narrow_bytepos_bounds (struct buffer *b, struct text_pos *prev,
|
narrow_bytepos_bounds (struct buffer *b, struct text_pos *prev,
|
||||||
struct text_pos *next, const ptrdiff_t bytepos)
|
struct text_pos *next, const ptrdiff_t bytepos)
|
||||||
{
|
{
|
||||||
const struct text_pos pt = pt_pos (b);
|
narrow_bytepos_bounds_1 (pt_pos (b), prev, next, bytepos);
|
||||||
if (narrow_bytepos_bounds_1 (pt, prev, next, bytepos))
|
narrow_bytepos_bounds_1 (gpt_pos (b), prev, next, bytepos);
|
||||||
return pt.charpos;
|
|
||||||
|
|
||||||
const struct text_pos gpt = gpt_pos (b);
|
|
||||||
if (narrow_bytepos_bounds_1 (gpt, prev, next, bytepos))
|
|
||||||
return gpt.charpos;
|
|
||||||
|
|
||||||
struct text_index *ti = b->text->index;
|
struct text_index *ti = b->text->index;
|
||||||
if (is_cache_valid (ti)
|
if (is_cache_valid (ti))
|
||||||
&& narrow_bytepos_bounds_1 (ti->cache, prev, next, bytepos))
|
narrow_bytepos_bounds_1 (ti->cache, prev, next, bytepos);
|
||||||
return ti->cache.charpos;
|
|
||||||
|
|
||||||
return TEXT_INDEX_INVALID_POSITION;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Improve the known bytepos bounds *PREV and *NEXT if KNOWN is closer
|
/* Improve the known bytepos bounds *PREV and *NEXT if KNOWN is closer
|
||||||
to BYTEPOS. If KNOWN is an exact match for BYTEPOS return true. */
|
to BYTEPOS. */
|
||||||
|
|
||||||
static bool
|
static void
|
||||||
narrow_charpos_bounds_1 (const struct text_pos known, struct text_pos *prev,
|
narrow_charpos_bounds_1 (const struct text_pos known, struct text_pos *prev,
|
||||||
struct text_pos *next, const ptrdiff_t charpos)
|
struct text_pos *next, const ptrdiff_t charpos)
|
||||||
{
|
{
|
||||||
eassert (charpos >= prev->charpos && charpos <= next->charpos);
|
eassert (charpos >= prev->charpos && charpos <= next->charpos);
|
||||||
eassert (known.charpos != TEXT_INDEX_INVALID_POSITION);
|
eassert (known.charpos != TEXT_INDEX_INVALID_POSITION);
|
||||||
if (known.charpos == charpos)
|
|
||||||
return true;
|
|
||||||
|
|
||||||
/* If KNOWN is in (PREV, BYTEPOS] it is a better PREV. */
|
/* If KNOWN is in (PREV, BYTEPOS] it is a better PREV. */
|
||||||
if (known.charpos < charpos
|
if (known.charpos <= charpos
|
||||||
&& known.charpos > prev->charpos)
|
&& known.charpos > prev->charpos)
|
||||||
*prev = known;
|
*prev = known;
|
||||||
|
|
||||||
/* If KNOWN is in [BYTEPOS NEXT) it is a better NEXT. */
|
/* If KNOWN is in [BYTEPOS NEXT) it is a better NEXT. */
|
||||||
if (known.charpos > charpos
|
if (known.charpos >= charpos
|
||||||
&& known.charpos < next->charpos)
|
&& known.charpos < next->charpos)
|
||||||
*next = known;
|
*next = known;
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Improve the known bytepos bounds *PREV and *NEXT of buffer B using
|
/* Improve the known bytepos bounds *PREV and *NEXT of buffer B using
|
||||||
known positions in B. BYTEPOS is a byte position to convert to a
|
known positions in B. BYTEPOS is a byte position to convert to a
|
||||||
character position. If an exact match for BYTEPOS is found, return
|
character position. */
|
||||||
its charpos, otherwise return TEXT_INDEX_INVALID_POSITION. */
|
|
||||||
|
|
||||||
static ptrdiff_t
|
static void
|
||||||
narrow_charpos_bounds (struct buffer *b, struct text_pos *prev,
|
narrow_charpos_bounds (struct buffer *b, struct text_pos *prev,
|
||||||
struct text_pos *next, const ptrdiff_t charpos)
|
struct text_pos *next, const ptrdiff_t charpos)
|
||||||
{
|
{
|
||||||
const struct text_pos pt = pt_pos (b);
|
narrow_charpos_bounds_1 (pt_pos (b), prev, next, charpos);
|
||||||
if (narrow_charpos_bounds_1 (pt, prev, next, charpos))
|
narrow_charpos_bounds_1 (gpt_pos (b), prev, next, charpos);
|
||||||
return pt.bytepos;
|
|
||||||
|
|
||||||
const struct text_pos gpt = gpt_pos (b);
|
|
||||||
if (narrow_charpos_bounds_1 (gpt, prev, next, charpos))
|
|
||||||
return gpt.bytepos;
|
|
||||||
|
|
||||||
struct text_index *ti = b->text->index;
|
struct text_index *ti = b->text->index;
|
||||||
if (is_cache_valid (ti)
|
if (is_cache_valid (ti))
|
||||||
&& narrow_charpos_bounds_1 (ti->cache, prev, next, charpos))
|
narrow_charpos_bounds_1 (ti->cache, prev, next, charpos);
|
||||||
return ti->cache.bytepos;
|
|
||||||
|
|
||||||
return TEXT_INDEX_INVALID_POSITION;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Return the character position in buffer B corresponding to
|
/* Return the character position in buffer B corresponding to
|
||||||
|
|
@ -608,6 +583,7 @@ narrow_charpos_bounds (struct buffer *b, struct text_pos *prev,
|
||||||
ptrdiff_t
|
ptrdiff_t
|
||||||
buf_bytepos_to_charpos (struct buffer *b, const ptrdiff_t bytepos)
|
buf_bytepos_to_charpos (struct buffer *b, const ptrdiff_t bytepos)
|
||||||
{
|
{
|
||||||
|
/* FIXME: Can BYTEPOS ever be outside of BEGV_BYTE..ZV_BYTE? */
|
||||||
/* If this buffer has as many characters as bytes, each character must
|
/* If this buffer has as many characters as bytes, each character must
|
||||||
be one byte. This takes care of the case where
|
be one byte. This takes care of the case where
|
||||||
enable-multibyte-characters is nil. */
|
enable-multibyte-characters is nil. */
|
||||||
|
|
@ -615,30 +591,39 @@ buf_bytepos_to_charpos (struct buffer *b, const ptrdiff_t bytepos)
|
||||||
if (z.charpos == z.bytepos)
|
if (z.charpos == z.bytepos)
|
||||||
return bytepos;
|
return bytepos;
|
||||||
|
|
||||||
/* BYTEPOS == Z_BYTE, and BYTEPOS is an interval boundary,
|
/* Begin with the interval (BEG, Z), and improve on that by taking known
|
||||||
then BYTEPOS does not have an index entry because we don't want
|
positions into account like PT, GPT and the cache. This might
|
||||||
extra entries for (Z, Z_BYTE). Changing that would be possible
|
already find the answer. */
|
||||||
but leads to more code than this if-statement, so it's probably
|
struct text_index *ti = ensure_has_index (b);
|
||||||
not worth it. */
|
struct text_pos prev = beg_pos (b);
|
||||||
if (bytepos == z.bytepos)
|
struct text_pos next = z;
|
||||||
return z.charpos;
|
|
||||||
|
narrow_bytepos_bounds (b, &prev, &next, bytepos);
|
||||||
|
|
||||||
|
/* Z_BYTE does not have an index entry because we don't want
|
||||||
|
extra entries for (Z, Z_BYTE), so short-circuit *before* looking
|
||||||
|
up the index. Changing that would be possible but leads to more
|
||||||
|
code than this if-statement, so it's probably not worth it. */
|
||||||
|
if (next.bytepos == bytepos)
|
||||||
|
return next.charpos;
|
||||||
|
|
||||||
ensure_bytepos_indexed (b, bytepos);
|
ensure_bytepos_indexed (b, bytepos);
|
||||||
|
|
||||||
struct text_index *ti = b->text->index;
|
|
||||||
const ptrdiff_t entry = index_bytepos_entry (ti, bytepos);
|
const ptrdiff_t entry = index_bytepos_entry (ti, bytepos);
|
||||||
struct text_pos prev = index_text_pos (ti, entry);
|
narrow_bytepos_bounds_1 (index_text_pos (ti, entry), &prev, &next, bytepos);
|
||||||
struct text_pos next = next_known_text_pos (b, entry);
|
narrow_bytepos_bounds_1 (next_known_text_pos (b, entry),
|
||||||
|
&prev, &next, bytepos);
|
||||||
|
|
||||||
ptrdiff_t charpos = narrow_bytepos_bounds (b, &prev, &next, bytepos);
|
if (next.charpos - prev.charpos == next.bytepos - prev.bytepos
|
||||||
if (charpos != TEXT_INDEX_INVALID_POSITION)
|
/* Beware: NEXT and PREV can be in the middle of multibyte chars! */
|
||||||
return charpos;
|
&& CHAR_HEAD_P (BUF_FETCH_BYTE (b, prev.bytepos)))
|
||||||
|
return prev.charpos + (bytepos - prev.bytepos); /* ASCII-only! */
|
||||||
|
|
||||||
/* Scan forward if the distance to the previous known position is
|
/* Scan forward if the distance to the previous known position is
|
||||||
smaller than the distance to the next known position. */
|
smaller than the distance to the next known position. */
|
||||||
if (bytepos - prev.bytepos < next.bytepos - bytepos)
|
ptrdiff_t charpos
|
||||||
charpos = charpos_forward_to_bytepos (b, prev, bytepos);
|
= (bytepos - prev.bytepos < next.bytepos - bytepos)
|
||||||
else
|
? charpos_forward_to_bytepos (b, prev, bytepos)
|
||||||
charpos = charpos_backward_to_bytepos (b, next, bytepos);
|
: charpos_backward_to_bytepos (b, next, bytepos);
|
||||||
|
|
||||||
cache (ti, charpos, bytepos);
|
cache (ti, charpos, bytepos);
|
||||||
return charpos;
|
return charpos;
|
||||||
|
|
@ -650,6 +635,7 @@ buf_bytepos_to_charpos (struct buffer *b, const ptrdiff_t bytepos)
|
||||||
ptrdiff_t
|
ptrdiff_t
|
||||||
buf_charpos_to_bytepos (struct buffer *b, const ptrdiff_t charpos)
|
buf_charpos_to_bytepos (struct buffer *b, const ptrdiff_t charpos)
|
||||||
{
|
{
|
||||||
|
/* FIXME: Can CHARPOS ever be outside of BEGV..ZV? */
|
||||||
/* If this buffer has as many characters as bytes, each character must
|
/* If this buffer has as many characters as bytes, each character must
|
||||||
be one byte. This takes care of the case where
|
be one byte. This takes care of the case where
|
||||||
enable-multibyte-characters is nil. */
|
enable-multibyte-characters is nil. */
|
||||||
|
|
@ -657,25 +643,24 @@ buf_charpos_to_bytepos (struct buffer *b, const ptrdiff_t charpos)
|
||||||
if (z.charpos == z.bytepos)
|
if (z.charpos == z.bytepos)
|
||||||
return charpos;
|
return charpos;
|
||||||
|
|
||||||
if (charpos == z.charpos)
|
|
||||||
return z.bytepos;
|
|
||||||
ensure_charpos_indexed (b, charpos);
|
|
||||||
|
|
||||||
/* Begin with the interval (BEG, Z), and improve on that by taking known
|
/* Begin with the interval (BEG, Z), and improve on that by taking known
|
||||||
positions into account like PT, GPT and the cache. This might
|
positions into account like PT, GPT and the cache. This might
|
||||||
already find the bytepos. */
|
already find the answer. */
|
||||||
struct text_index *ti = ensure_has_index (b);
|
struct text_index *ti = ensure_has_index (b);
|
||||||
struct text_pos prev = beg_pos (b);
|
struct text_pos prev = beg_pos (b);
|
||||||
struct text_pos next = z;
|
struct text_pos next = z;
|
||||||
|
|
||||||
ptrdiff_t bytepos = narrow_charpos_bounds (b, &prev, &next, charpos);
|
narrow_charpos_bounds (b, &prev, &next, charpos);
|
||||||
if (bytepos != TEXT_INDEX_INVALID_POSITION)
|
|
||||||
return bytepos;
|
if (next.charpos - prev.charpos == next.bytepos - prev.bytepos)
|
||||||
|
return prev.bytepos + (charpos - prev.charpos); /* ASCII-only! */
|
||||||
|
else if (next.charpos == charpos)
|
||||||
|
return next.bytepos;
|
||||||
|
|
||||||
/* If one of the bounds is already good enough, avoid consulting
|
/* If one of the bounds is already good enough, avoid consulting
|
||||||
the index since that involves some overhead. */
|
the index since that involves some overhead. */
|
||||||
if (!is_close_enough_charpos (ti, charpos, prev)
|
if (!is_close_enough_charpos (charpos, prev)
|
||||||
&& !is_close_enough_charpos (ti, charpos, next))
|
&& !is_close_enough_charpos (charpos, next))
|
||||||
{
|
{
|
||||||
ensure_charpos_indexed (b, charpos);
|
ensure_charpos_indexed (b, charpos);
|
||||||
const ptrdiff_t entry = index_charpos_entry (ti, charpos);
|
const ptrdiff_t entry = index_charpos_entry (ti, charpos);
|
||||||
|
|
@ -683,19 +668,23 @@ buf_charpos_to_bytepos (struct buffer *b, const ptrdiff_t charpos)
|
||||||
narrow_charpos_bounds_1 (index_prev, &prev, &next, charpos);
|
narrow_charpos_bounds_1 (index_prev, &prev, &next, charpos);
|
||||||
const struct text_pos index_next = next_known_text_pos (b, entry);
|
const struct text_pos index_next = next_known_text_pos (b, entry);
|
||||||
narrow_charpos_bounds_1 (index_next, &prev, &next, charpos);
|
narrow_charpos_bounds_1 (index_next, &prev, &next, charpos);
|
||||||
|
|
||||||
|
if (next.charpos - prev.charpos == next.bytepos - prev.bytepos
|
||||||
|
/* Beware: NEXT and PREV can be in the middle of multibyte chars! */
|
||||||
|
&& CHAR_HEAD_P (BUF_FETCH_BYTE (b, prev.bytepos))
|
||||||
|
&& CHAR_HEAD_P (BUF_FETCH_BYTE (b, next.bytepos - 1)))
|
||||||
|
return prev.bytepos + (charpos - prev.charpos); /* ASCII-only! */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* Don't scan forward if CHARPOS is exactly on the previous know
|
/* Don't scan forward if CHARPOS is exactly on the previous know
|
||||||
position because the index bytepos can be in the middle of a
|
position because the index bytepos can be in the middle of a
|
||||||
character, which is found by scanning backwards. Otherwise, scan
|
character, which is found by scanning backwards. Otherwise, scan
|
||||||
forward if we believe that's less expensive. */
|
forward if we believe that's less expensive. */
|
||||||
if (charpos > prev.charpos
|
ptrdiff_t bytepos
|
||||||
|
= (charpos > prev.charpos
|
||||||
&& charpos - prev.charpos < next.charpos - charpos)
|
&& charpos - prev.charpos < next.charpos - charpos)
|
||||||
bytepos = bytepos_forward_to_charpos (b, prev, charpos);
|
? bytepos_forward_to_charpos (b, prev, charpos)
|
||||||
else
|
: bytepos_backward_to_charpos (b, next, charpos);
|
||||||
bytepos = bytepos_backward_to_charpos (b, next, charpos);
|
|
||||||
|
|
||||||
cache (ti, charpos, bytepos);
|
cache (ti, charpos, bytepos);
|
||||||
return bytepos;
|
return bytepos;
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue