Initial revision

2026-03-06 05:52:32 -08:00 · 1995-03-17 00:46:57 +00:00 · 1995-03-17 00:46:57 +00:00 · b9c5136fd6
commit b9c5136fd6
parent 509ed182e7
2 changed files with 944 additions and 0 deletions
--- a/src/region-cache.c
+++ b/src/region-cache.c
@ -0,0 +1,833 @@
+/* Caching facts about regions of the buffer, for optimization.
+   Copyright (C) 1985, 1986, 1987, 1988, 1989, 1993
+	Free Software Foundation, Inc.
+
+This file is part of GNU Emacs.
+
+GNU Emacs is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Emacs is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Emacs; see the file COPYING.  If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
+
+
+#include <config.h>
+#include "lisp.h"
+#include "buffer.h"
+#include "region-cache.h"
+
+#include <stdio.h>
+
+
+/* Data structures.  */
+
+/* The region cache.
+
+   We want something that maps character positions in a buffer onto
+   values.  The representation should deal well with long runs of
+   characters with the same value.
+
+   The tricky part: the representation should be very cheap to
+   maintain in the presence of many insertions and deletions.  If the
+   overhead of maintaining the cache is too high, the speedups it
+   offers will be worthless.
+
+
+   We represent the region cache as a sorted array of struct
+   boundary's, each of which contains a buffer position and a value;
+   the value applies to all the characters after the buffer position,
+   until the position of the next boundary, or the end of the buffer.
+
+   The cache always has a boundary whose position is BUF_BEG, so
+   there's always a value associated with every character in the
+   buffer.  Since the cache is sorted, this is always the first
+   element of the cache.
+
+   To facilitate the insertion and deletion of boundaries in the
+   cache, the cache has a gap, just like Emacs's text buffers do.
+
+   To help boundary positions float along with insertions and
+   deletions, all boundary positions before the cache gap are stored
+   relative to BUF_BEG (buf) (thus they're >= 0), and all boundary
+   positions after the gap are stored relative to BUF_Z (buf) (thus
+   they're <= 0).  Look at BOUNDARY_POS to see this in action.  See
+   revalidate_region_cache to see how this helps.  */
+
+struct boundary {
+  int pos;
+  int value;
+};
+
+struct region_cache {
+  /* A sorted array of locations where the known-ness of the buffer
+     changes.  */
+  struct boundary *boundaries;
+
+  /* boundaries[gap_start ... gap_start + gap_len - 1] is the gap.  */
+  int gap_start, gap_len;
+
+  /* The number of elements allocated to boundaries, not including the
+     gap.  */
+  int cache_len;
+
+  /* The areas that haven't changed since the last time we cleaned out
+     invalid entries from the cache.  These overlap when the buffer is
+     entirely unchanged.  */
+  int beg_unchanged, end_unchanged;
+
+  /* The first and last positions in the buffer.  Because boundaries
+     store their positions relative to the start (BEG) and end (Z) of
+     the buffer, knowing these positions allows us to accurately
+     interpret positions without having to pass the buffer structure
+     or its endpoints around all the time.
+
+     Yes, buffer_beg is always 1.  It's there for symmetry with
+     buffer_end and the BEG and BUF_BEG macros.  */
+  int buffer_beg, buffer_end;
+};
+
+/* Return the position of boundary i in cache c.  */
+#define BOUNDARY_POS(c, i) \
+  ((i) < (c)->gap_start \
+   ? (c)->buffer_beg + (c)->boundaries[(i)].pos \
+   : (c)->buffer_end + (c)->boundaries[(c)->gap_len + (i)].pos)
+
+/* Return the value for text after boundary i in cache c.  */
+#define BOUNDARY_VALUE(c, i) \
+  ((i) < (c)->gap_start \
+   ? (c)->boundaries[(i)].value \
+   : (c)->boundaries[(c)->gap_len + (i)].value)
+
+/* Set the value for text after boundary i in cache c to v.  */
+#define SET_BOUNDARY_VALUE(c, i, v) \
+  ((i) < (c)->gap_start \
+   ? ((c)->boundaries[(i)].value = (v))\
+   : ((c)->boundaries[(c)->gap_len + (i)].value = (v)))
+
+
+/* How many elements to add to the gap when we resize the buffer.  */
+#define NEW_CACHE_GAP (40)
+
+/* See invalidate_region_cache; if an invalidation would throw away
+   information about this many characters, call
+   revalidate_region_cache before doing the new invalidation, to
+   preserve that information, instead of throwing it away.  */
+#define PRESERVE_THRESHOLD (500)
+
+static void revalidate_region_cache ();
+
+
+/* Interface: Allocating, initializing, and disposing of region caches.  */
+
+struct region_cache *
+new_region_cache ()
+{
+  struct region_cache *c 
+    = (struct region_cache *) xmalloc (sizeof (struct region_cache));
+
+  c->gap_start = 0;
+  c->gap_len = NEW_CACHE_GAP;
+  c->cache_len = 0;
+  c->boundaries =
+    (struct boundary *) xmalloc ((c->gap_len + c->cache_len)
+                                 * sizeof (*c->boundaries));
+
+  c->beg_unchanged = 0;
+  c->end_unchanged = 0;
+  c->buffer_beg = 1;
+  c->buffer_end = 1;
+
+  /* Insert the boundary for the buffer start.  */
+  c->cache_len++;
+  c->gap_len--;
+  c->gap_start++;
+  c->boundaries[0].pos   = 0;  /* from buffer_beg */
+  c->boundaries[0].value = 0;
+
+  return c;
+}
+
+void
+free_region_cache (c)
+     struct region_cache *c;
+{
+  xfree (c->boundaries);
+  xfree (c);
+}
+
+
+/* Finding positions in the cache.  */
+
+/* Return the index of the last boundary in cache C at or before POS.
+   In other words, return the boundary that specifies the value for
+   the region POS..(POS + 1).
+
+   This operation should be logarithmic in the number of cache
+   entries.  It would be nice if it took advantage of locality of
+   reference, too, by searching entries near the last entry found.  */
+static int
+find_cache_boundary (c, pos)
+     struct region_cache *c;
+     int pos;
+{
+  int low = 0, high = c->cache_len;
+
+  while (low + 1 < high)
+    {
+      /* mid is always a valid index, because low < high and ">> 1"
+         rounds down.  */
+      int mid = (low + high) >> 1;
+      int boundary = BOUNDARY_POS (c, mid);
+
+      if (pos < boundary)
+        high = mid;
+      else
+        low = mid;
+    }
+
+  /* Some testing.  */
+  if (BOUNDARY_POS (c, low) > pos
+      || (low + 1 < c->cache_len
+          && BOUNDARY_POS (c, low + 1) <= pos))
+      abort ();
+
+  return low;
+}
+
+
+
+/* Moving the cache gap around, inserting, and deleting.  */
+
+
+/* Move the gap of cache C to index POS, and make sure it has space
+   for at least MIN_SIZE boundaries.  */
+static void
+move_cache_gap (c, pos, min_size)
+     struct region_cache *c;
+     int pos;
+     int min_size;
+{
+  /* Copy these out of the cache and into registers.  */
+  int gap_start = c->gap_start;
+  int gap_len = c->gap_len;
+  int buffer_beg = c->buffer_beg;
+  int buffer_end = c->buffer_end;
+
+  if (pos < 0
+      || pos > c->cache_len)
+    abort ();
+
+  /* We mustn't ever try to put the gap before the dummy start
+     boundary.  That must always be start-relative.  */
+  if (pos == 0)
+    abort ();
+
+  /* Need we move the gap right?  */
+  while (gap_start < pos)
+    {
+      /* Copy one boundary from after to before the gap, and
+         convert its position to start-relative.  */
+      c->boundaries[gap_start].pos
+        = (buffer_end
+           + c->boundaries[gap_start + gap_len].pos
+           - buffer_beg);
+      c->boundaries[gap_start].value
+        = c->boundaries[gap_start + gap_len].value;
+      gap_start++;
+    }
+
+  /* To enlarge the gap, we need to re-allocate the boundary array, and
+     then shift the area after the gap to the new end.  Since the cost
+     is proportional to the amount of stuff after the gap, we do the
+     enlargement here, after a right shift but before a left shift,
+     when the portion after the gap is smallest.  */
+  if (gap_len < min_size)
+    {
+      int i;
+
+      /* Always make at least NEW_CACHE_GAP elements, as long as we're
+         expanding anyway.  */
+      if (min_size < NEW_CACHE_GAP)
+        min_size = NEW_CACHE_GAP;
+
+      c->boundaries =
+        (struct boundary *) xrealloc (c->boundaries,
+                                      ((min_size + c->cache_len)
+                                       * sizeof (*c->boundaries)));
+
+      /* Some systems don't provide a version of the copy routine that
+         can be trusted to shift memory upward into an overlapping
+         region.  memmove isn't widely available.  */
+      min_size -= gap_len;
+      for (i = c->cache_len - 1; i >= gap_start; i--)
+        {
+          c->boundaries[i + min_size].pos   = c->boundaries[i + gap_len].pos;
+          c->boundaries[i + min_size].value = c->boundaries[i + gap_len].value;
+        }
+
+      gap_len = min_size;
+    }
+
+  /* Need we move the gap left?  */
+  while (pos < gap_start)
+    {
+      gap_start--;
+
+      /* Copy one region from before to after the gap, and
+         convert its position to end-relative.  */
+      c->boundaries[gap_start + gap_len].pos
+        = c->boundaries[gap_start].pos + buffer_beg - buffer_end;
+      c->boundaries[gap_start + gap_len].value
+        = c->boundaries[gap_start].value;
+    }
+
+  /* Assign these back into the cache.  */
+  c->gap_start = gap_start;
+  c->gap_len  = gap_len;
+}
+
+
+/* Insert a new boundary in cache C; it will have cache index INDEX,
+   and have the specified POS and VALUE.  */
+static void
+insert_cache_boundary (c, index, pos, value)
+     struct region_cache *c;
+     int index;
+     int pos, value;
+{
+  /* index must be a valid cache index.  */
+  if (index < 0 || index > c->cache_len)
+    abort ();
+
+  /* We must never want to insert something before the dummy first
+     boundary.  */
+  if (index == 0)
+    abort ();
+
+  /* We must only be inserting things in order.  */
+  if (! (BOUNDARY_POS (c, index-1) < pos
+         && (index == c->cache_len
+             || pos < BOUNDARY_POS (c, index))))
+    abort ();
+
+  /* The value must be different from the ones around it.  However, we
+     temporarily create boundaries that establish the same value as
+     the subsequent boundary, so we're not going to flag that case.  */
+  if (BOUNDARY_VALUE (c, index-1) == value)
+    abort ();
+
+  move_cache_gap (c, index, 1);
+
+  c->boundaries[index].pos = pos - c->buffer_beg;
+  c->boundaries[index].value = value;
+  c->gap_start++;
+  c->gap_len--;
+  c->cache_len++;
+}
+
+
+/* Delete the i'th entry from cache C if START <= i < END.  */
+
+static void
+delete_cache_boundaries (c, start, end)
+     struct region_cache *c;
+     int start, end;
+{
+  int len = end - start;
+
+  /* Gotta be in range.  */
+  if (start < 0
+      || end > c->cache_len)
+    abort ();
+
+  /* Gotta be in order.  */
+  if (start > end)
+    abort ();
+
+  /* Can't delete the dummy entry.  */
+  if (start == 0
+      && end >= 1)
+    abort ();
+
+  /* Minimize gap motion.  If we're deleting nothing, do nothing.  */
+  if (len == 0)
+    ;
+  /* If the gap is before the region to delete, delete from the start
+     forward.  */
+  else if (c->gap_start <= start)
+    {
+      move_cache_gap (c, start, 0);
+      c->gap_len += len;
+    }
+  /* If the gap is after the region to delete, delete from the end
+     backward.  */
+  else if (end <= c->gap_start)
+    {
+      move_cache_gap (c, end, 0);
+      c->gap_start -= len;
+      c->gap_len   += len;
+    }
+  /* If the gap is in the region to delete, just expand it.  */
+  else
+    {
+      c->gap_start = start;
+      c->gap_len   += len;
+    }
+
+  c->cache_len -= len;
+}
+     
+
+
+/* Set the value for a region.  */
+
+/* Set the value in cache C for the region START..END to VALUE.  */
+static void
+set_cache_region (c, start, end, value)
+     struct region_cache *c;
+     int start, end;
+     int value;
+{
+  if (start > end)
+    abort ();
+  if (start < c->buffer_beg
+      || end   > c->buffer_end)
+    abort ();
+
+  /* Eliminate this case; then we can assume that start and end-1 are
+     both the locations of real characters in the buffer.  */
+  if (start == end)
+    return;
+  
+  {
+    /* We need to make sure that there are no boundaries in the area
+       between start to end; the whole area will have the same value,
+       so those boundaries will not be necessary.
+       
+       Let start_ix be the cache index of the boundary governing the
+       first character of start..end, and let end_ix be the cache
+       index of the earliest boundary after the last character in
+       start..end.  (This tortured terminology is intended to answer
+       all the "< or <=?" sort of questions.)  */
+    int start_ix = find_cache_boundary (c, start);
+    int end_ix   = find_cache_boundary (c, end - 1) + 1;
+
+    /* We must remember the value established by the last boundary
+       before end; if that boundary's domain stretches beyond end,
+       we'll need to create a new boundary at end, and that boundary
+       must have that remembered value.  */
+    int value_at_end = BOUNDARY_VALUE (c, end_ix - 1);
+
+    /* Delete all boundaries strictly within start..end; this means
+       those whose indices are between start_ix (exclusive) and end_ix
+       (exclusive).  */
+    delete_cache_boundaries (c, start_ix + 1, end_ix);
+
+    /* Make sure we have the right value established going in to
+       start..end from the left, and no unnecessary boundaries.  */
+    if (BOUNDARY_POS (c, start_ix) == start)
+      {
+        /* Is this boundary necessary?  If no, remove it; if yes, set
+           its value.  */
+        if (start_ix > 0
+            && BOUNDARY_VALUE (c, start_ix - 1) == value)
+          {
+            delete_cache_boundaries (c, start_ix, start_ix + 1);
+            start_ix--;
+          }
+        else
+          SET_BOUNDARY_VALUE (c, start_ix, value);
+      }
+    else
+      {
+        /* Do we need to add a new boundary here?  */
+        if (BOUNDARY_VALUE (c, start_ix) != value)
+          {
+            insert_cache_boundary (c, start_ix + 1, start, value);
+            start_ix++;
+          }
+      }
+          
+    /* This is equivalent to letting end_ix float (like a buffer
+       marker does) with the insertions and deletions we may have
+       done.  */
+    end_ix = start_ix + 1;
+
+    /* Make sure we have the correct value established as we leave
+       start..end to the right.  */
+    if (end == c->buffer_end)
+      /* There is no text after start..end; nothing to do.  */
+      ;
+    else if (end_ix >= c->cache_len
+             || end < BOUNDARY_POS (c, end_ix))
+      {
+        /* There is no boundary at end, but we may need one.  */
+        if (value_at_end != value)
+          insert_cache_boundary (c, end_ix, end, value_at_end);
+      }
+    else
+      {
+        /* There is a boundary at end; should it be there?  */
+        if (value == BOUNDARY_VALUE (c, end_ix))
+          delete_cache_boundaries (c, end_ix, end_ix + 1);
+      }
+  }
+}
+
+
+
+/* Interface: Invalidating the cache.  Private: Re-validating the cache.  */
+
+/* Indicate that a section of BUF has changed, to invalidate CACHE.
+   HEAD is the number of chars unchanged at the beginning of the buffer.
+   TAIL is the number of chars unchanged at the end of the buffer.
+      NOTE: this is *not* the same as the ending position of modified
+      region.
+   (This way of specifying regions makes more sense than absolute
+   buffer positions in the presence of insertions and deletions; the
+   args to pass are the same before and after such an operation.)  */
+void
+invalidate_region_cache (buf, c, head, tail)
+     struct buffer *buf;
+     struct region_cache *c;
+     int head, tail;
+{
+  /* Let chead = c->beg_unchanged, and
+         ctail = c->end_unchanged.
+     If z-tail < beg+chead by a large amount, or
+        z-ctail < beg+head by a large amount,
+
+     then cutting back chead and ctail to head and tail would lose a
+     lot of information that we could preserve by revalidating the
+     cache before processing this invalidation.  Losing that
+     information may be more costly than revalidating the cache now.
+     So go ahead and call revalidate_region_cache if it seems that it
+     might be worthwhile.  */
+  if (((BUF_BEG (buf) + c->beg_unchanged) - (BUF_Z (buf) - tail)
+       > PRESERVE_THRESHOLD)
+      || ((BUF_BEG (buf) + head) - (BUF_Z (buf) - c->end_unchanged)
+          > PRESERVE_THRESHOLD))
+    revalidate_region_cache (buf, c);
+
+
+  if (head < c->beg_unchanged)
+    c->beg_unchanged = head;
+  if (tail < c->end_unchanged)
+    c->end_unchanged = tail;
+
+  /* We now know nothing about the region between the unchanged head
+     and the unchanged tail (call it the "modified region"), not even
+     its length.
+
+     If the modified region has shrunk in size (deletions do this),
+     then the cache may now contain boundaries originally located in
+     text that doesn't exist any more.
+
+     If the modified region has increased in size (insertions do
+     this), then there may now be boundaries in the modified region
+     whose positions are wrong.
+
+     Even calling BOUNDARY_POS on boundaries still in the unchanged
+     head or tail may well give incorrect answers now, since
+     c->buffer_beg and c->buffer_end may well be wrong now.  (Well,
+     okay, c->buffer_beg never changes, so boundaries in the unchanged
+     head will still be okay.  But it's the principle of the thing.)
+
+     So things are generally a mess.
+
+     But we don't clean up this mess here; that would be expensive,
+     and this function gets called every time any buffer modification
+     occurs.  Rather, we can clean up everything in one swell foop,
+     accounting for all the modifications at once, by calling
+     revalidate_region_cache before we try to consult the cache the
+     next time.  */
+}
+
+
+/* Clean out any cache entries applying to the modified region, and 
+   make the positions of the remaining entries accurate again.
+
+   After calling this function, the mess described in the comment in
+   invalidate_region_cache is cleaned up.
+
+   This function operates by simply throwing away everything it knows
+   about the modified region.  It doesn't care exactly which
+   insertions and deletions took place; it just tosses it all.
+
+   For example, if you insert a single character at the beginning of
+   the buffer, and a single character at the end of the buffer (for
+   example), without calling this function in between the two
+   insertions, then the entire cache will be freed of useful
+   information.  On the other hand, if you do manage to call this
+   function in between the two insertions, then the modified regions
+   will be small in both cases, no information will be tossed, and the
+   cache will know that it doesn't have knowledge of the first and
+   last characters any more.
+
+   Calling this function may be expensive; it does binary searches in
+   the cache, and causes cache gap motion.  */
+
+static void
+revalidate_region_cache (buf, c)
+     struct buffer *buf;
+     struct region_cache *c;
+{
+  /* The boundaries now in the cache are expressed relative to the
+     buffer_beg and buffer_end values stored in the cache.  Now,
+     buffer_beg and buffer_end may not be the same as BUF_BEG (buf)
+     and BUF_Z (buf), so we have two different "bases" to deal with
+     --- the cache's, and the buffer's.  */
+
+  /* If the entire buffer is still valid, don't waste time.  Yes, this
+     should be a >, not a >=; think about what beg_unchanged and
+     end_unchanged get set to when the only change has been an
+     insertion.  */
+  if (c->buffer_beg + c->beg_unchanged
+      > c->buffer_end - c->end_unchanged)
+    return;
+
+  /* If all the text we knew about as of the last cache revalidation
+     is still there, then all of the information in the cache is still
+     valid.  Because c->buffer_beg and c->buffer_end are out-of-date,
+     the modified region appears from the cache's point of view to be
+     a null region located someplace in the buffer.
+
+     Now, invalidating that empty string will have no actual affect on
+     the cache; instead, we need to update the cache's basis first
+     (which will give the modified region the same size in the cache
+     as it has in the buffer), and then invalidate the modified
+     region. */
+  if (c->buffer_beg + c->beg_unchanged 
+      == c->buffer_end - c->end_unchanged)
+    {
+      /* Move the gap so that all the boundaries in the unchanged head
+         are expressed beg-relative, and all the boundaries in the
+         unchanged tail are expressed end-relative.  That done, we can
+         plug in the new buffer beg and end, and all the positions
+         will be accurate.
+
+         The boundary which has jurisdiction over the modified region
+         should be left before the gap.  */
+      move_cache_gap (c,
+                      (find_cache_boundary (c, (c->buffer_beg
+                                                + c->beg_unchanged))
+                       + 1),
+                      0);
+
+      c->buffer_beg = BUF_BEG (buf);
+      c->buffer_end = BUF_Z   (buf);
+
+      /* Now that the cache's basis has been changed, the modified
+         region actually takes up some space in the cache, so we can
+         invalidate it.  */
+      set_cache_region (c,
+                        c->buffer_beg + c->beg_unchanged,
+                        c->buffer_end - c->end_unchanged,
+                        0);
+    }
+
+  /* Otherwise, there is a non-empty region in the cache which
+     corresponds to the modified region of the buffer.  */
+  else
+    {
+      int modified_ix;
+
+      /* These positions are correct, relative to both the cache basis
+         and the buffer basis.  */
+      set_cache_region (c,
+                        c->buffer_beg + c->beg_unchanged,
+                        c->buffer_end - c->end_unchanged,
+                        0);
+
+      /* Now the cache contains only boundaries that are in the
+         unchanged head and tail; we've disposed of any boundaries
+         whose positions we can't be sure of given the information
+         we've saved.
+
+         If we put the cache gap between the unchanged head and the
+         unchanged tail, we can adjust all the boundary positions at
+         once, simply by setting buffer_beg and buffer_end.
+
+         The boundary which has jurisdiction over the modified region
+         should be left before the gap.  */
+      modified_ix =
+        find_cache_boundary (c, (c->buffer_beg + c->beg_unchanged)) + 1;
+      move_cache_gap (c, modified_ix, 0);
+
+      c->buffer_beg = BUF_BEG (buf);
+      c->buffer_end = BUF_Z   (buf);
+
+      /* Now, we may have shrunk the buffer when we changed the basis,
+         and brought the boundaries we created for the start and end
+         of the modified region together, giving them the same
+         position.  If that's the case, we should collapse them into
+         one boundary.  Or we may even delete them both, if the values
+         before and after them are the same.  */
+      if (modified_ix < c->cache_len
+          && (BOUNDARY_POS (c, modified_ix - 1)
+              == BOUNDARY_POS (c, modified_ix)))
+        {
+          int value_after = BOUNDARY_VALUE (c, modified_ix);
+
+          /* Should we remove both of the boundaries?  Yes, if the
+             latter boundary is now establishing the same value that
+             the former boundary's predecessor does.  */
+          if (modified_ix - 1 > 0
+              && value_after == BOUNDARY_VALUE (c, modified_ix - 2))
+            delete_cache_boundaries (c, modified_ix - 1, modified_ix + 1);
+          else
+            {
+              /* We do need a boundary here; collapse the two
+                 boundaries into one.  */
+              SET_BOUNDARY_VALUE (c, modified_ix - 1, value_after);
+              delete_cache_boundaries (c, modified_ix, modified_ix + 1);
+            }
+        }
+    }
+
+  /* Now the entire cache is valid.  */
+  c->beg_unchanged
+    = c->end_unchanged
+      = c->buffer_end - c->buffer_beg;
+}
+
+
+/* Interface: Adding information to the cache.  */
+
+/* Assert that the region of BUF between START and END (absolute
+   buffer positions) is "known," for the purposes of CACHE (e.g. "has
+   no newlines", in the case of the line cache).  */
+void
+know_region_cache (buf, c, start, end)
+     struct buffer *buf;
+     struct region_cache *c;
+     int start, end;
+{
+  revalidate_region_cache (buf, c);
+
+  set_cache_region (c, start, end, 1);
+}
+
+
+/* Interface: using the cache.  */
+
+/* Return true if the text immediately after POS in BUF is known, for
+   the purposes of CACHE.  If NEXT is non-zero, set *NEXT to the nearest 
+   position after POS where the knownness changes.  */
+int
+region_cache_forward (buf, c, pos, next)
+     struct buffer *buf;
+     struct region_cache *c;
+     int pos;
+     int *next;
+{
+  revalidate_region_cache (buf, c);
+
+  {
+    int i = find_cache_boundary (c, pos);
+    int i_value = BOUNDARY_VALUE (c, i);
+    int j;
+
+    /* Beyond the end of the buffer is unknown, by definition.  */
+    if (pos >= BUF_Z (buf))
+      {
+        if (next) *next = BUF_Z (buf);
+        i_value = 0;
+      }
+    else if (next)
+      {
+        /* Scan forward from i to find the next differing position.  */
+        for (j = i + 1; j < c->cache_len; j++)
+          if (BOUNDARY_VALUE (c, j) != i_value)
+            break;
+
+        if (j < c->cache_len)
+          *next = BOUNDARY_POS (c, j);
+        else
+          *next = BUF_Z (buf);
+      }
+
+    return i_value;
+  }
+}
+
+/* Return true if the text immediately before POS in BUF is known, for
+   the purposes of CACHE.  If NEXT is non-zero, set *NEXT to the nearest
+   position before POS where the knownness changes.  */
+int region_cache_backward (buf, c, pos, next)
+     struct buffer *buf;
+     struct region_cache *c;
+     int pos;
+     int *next;
+{
+  revalidate_region_cache (buf, c);
+
+  /* Before the beginning of the buffer is unknown, by
+     definition. */
+  if (pos <= BUF_BEG (buf))
+    {
+      if (next) *next = BUF_BEG (buf);
+      return 0;
+    }
+
+  {
+    int i = find_cache_boundary (c, pos - 1);
+    int i_value = BOUNDARY_VALUE (c, i);
+    int j;
+
+    if (next)
+      {
+        /* Scan backward from i to find the next differing position.  */
+        for (j = i - 1; j >= 0; j--)
+          if (BOUNDARY_VALUE (c, j) != i_value)
+            break;
+
+        if (j >= 0)
+          *next = BOUNDARY_POS (c, j + 1);
+        else
+          *next = BUF_BEG (buf);
+      }
+
+    return i_value;
+  }
+}
+
+
+/* Debugging: pretty-print a cache to the standard error output.  */
+
+void
+pp_cache (c)
+     struct region_cache *c;
+{
+  int i;
+  int beg_u = c->buffer_beg + c->beg_unchanged;
+  int end_u = c->buffer_end - c->end_unchanged;
+
+  fprintf (stderr,
+           "basis: %d..%d    modified: %d..%d\n",
+           c->buffer_beg, c->buffer_end,
+           beg_u, end_u);
+
+  for (i = 0; i < c->cache_len; i++)
+    {
+      int pos = BOUNDARY_POS (c, i);
+
+      putc (((pos < beg_u) ? 'v'
+             : (pos == beg_u) ? '-'
+             : ' '),
+            stderr);
+      putc (((pos > end_u) ? '^'
+             : (pos == end_u) ? '-'
+             : ' '),
+            stderr);
+      fprintf (stderr, "%d : %d\n", pos, BOUNDARY_VALUE (c, i));
+    }
+}
--- a/src/region-cache.h
+++ b/src/region-cache.h
@ -0,0 +1,111 @@
+/* Header file: Caching facts about regions of the buffer, for optimization.
+   Copyright (C) 1985, 1986, 1993 Free Software Foundation, Inc.
+
+This file is part of GNU Emacs.
+
+GNU Emacs is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Emacs is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Emacs; see the file COPYING.  If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
+
+
+/* This code was written by Jim Blandy <jimb@cs.oberlin.edu> to help
+   GNU Emacs better support the gene editor written for the University
+   of Illinois at Urbana-Champagne's Ribosome Database Project (RDP).
+
+   Emacs implements line operations (finding the beginning/end of the
+   line, vertical motion, all the redisplay stuff) by searching for
+   newlines in the buffer.  Usually, this is a good design; it's very
+   clean to just represent the buffer as an unstructured string of
+   characters, and the lines in most files are very short (less than
+   eighty characters), meaning that scanning usually costs about the
+   same as the overhead of maintaining some more complicated data
+   structure.
+
+   However, some applications, like gene editing, make use of very
+   long lines --- on the order of tens of kilobytes.  In such cases,
+   it may well be worthwhile to try to avoid scanning, because the
+   scans have become two orders of magnitude more expensive.  It would
+   be nice if this speedup could preserve the simplicity of the
+   existing data structure, and disturb as little of the existing code
+   as possible.
+
+   So here's the tack.  We add some caching to the scan_buffer
+   function, so that when it searches for a newline, it notes that the
+   region between the start and end of the search contained no
+   newlines; then, the next time around, it consults this cache to see
+   if there are regions of text it can skip over completely.  The
+   buffer modification primitives invalidate this cache.
+
+   (Note: Since the redisplay code needs similar information on
+   modified regions of the buffer, we can use the code that helps out
+   redisplay as a guide to where we need to add our own code to
+   invalidate our cache.  prepare_to_modify_buffer seems to be the
+   central spot.)
+
+   Note that the cache code itself never mentions newlines
+   specifically, so if you wanted to cache other properties of regions
+   of the buffer, you could use this code pretty much unchanged.  So
+   this cache really holds "known/unknown" information --- "I know
+   this region has property P" vs. "I don't know if this region has
+   property P or not."  */
+
+
+/* Allocate, initialize and return a new, empty region cache.  */
+struct region_cache *new_region_cache ( /* void */ );
+
+/* Free a region cache.  */
+void free_region_cache ( /* struct region_cache * */ );
+
+/* Assert that the region of BUF between START and END (absolute
+   buffer positions) is "known," for the purposes of CACHE (e.g. "has
+   no newlines", in the case of the line cache).  */
+extern void know_region_cache ( /* struct buffer *BUF,
+                                   struct region_cache *CACHE,
+                                   int START, END */ );
+
+/* Indicate that a section of BUF has changed, to invalidate CACHE.
+   HEAD is the number of chars unchanged at the beginning of the buffer.
+   TAIL is the number of chars unchanged at the end of the buffer.
+      NOTE: this is *not* the same as the ending position of modified
+      region.
+   (This way of specifying regions makes more sense than absolute
+   buffer positions in the presence of insertions and deletions; the
+   args to pass are the same before and after such an operation.)  */
+extern void invalidate_region_cache ( /* struct buffer *BUF,
+                                       struct region_cache *CACHE,
+                                       int HEAD, TAIL */ );
+
+/* The scanning functions. 
+
+   Basically, if you're scanning forward/backward from position POS,
+   and region_cache_forward/backward returns true, you can skip all
+   the text between POS and *NEXT.  And if the function returns false,
+   you should examine all the text from POS to *NEXT, and call
+   know_region_cache depending on what you find there; this way, you
+   might be able to avoid scanning it again.  */
+
+/* Return true if the text immediately after POS in BUF is known, for
+   the purposes of CACHE.  If NEXT is non-zero, set *NEXT to the nearest 
+   position after POS where the knownness changes.  */
+extern int region_cache_forward ( /* struct buffer *BUF,
+                                     struct region_cache *CACHE,
+                                     int POS,
+                                     int *NEXT */ );
+
+/* Return true if the text immediately before POS in BUF is known, for
+   the purposes of CACHE.  If NEXT is non-zero, set *NEXT to the nearest
+   position before POS where the knownness changes.  */
+extern int region_cache_backward ( /* struct buffer *BUF,
+                                      struct region_cache *CACHE,
+                                      int POS,
+                                      int *NEXT */ );