mirror of
git://git.sv.gnu.org/emacs.git
synced 2026-01-06 20:00:46 -08:00
(analyse_first): For eight-bit-control chars, mark both the
char's value and its leading byte in the fastmap. (re_search_2): When fast-scanning without translation, be careful to check that we only match the leading byte of a multibyte char.
This commit is contained in:
parent
3ffcda5471
commit
4560a582d6
2 changed files with 54 additions and 9 deletions
|
|
@ -1,3 +1,14 @@
|
|||
2006-09-22 Stefan Monnier <monnier@iro.umontreal.ca>
|
||||
|
||||
* regex.c (analyse_first): For eight-bit-control chars, mark both the
|
||||
char's value and its leading byte in the fastmap.
|
||||
(re_search_2): When fast-scanning without translation, be careful to
|
||||
check that we only match the leading byte of a multibyte char.
|
||||
|
||||
* charset.h (PREV_CHAR_BOUNDARY): Make it work from within a char's
|
||||
byte sequence.
|
||||
(AT_CHAR_BOUNDARY): New macro.
|
||||
|
||||
2006-09-22 Kenichi Handa <handa@m17n.org>
|
||||
|
||||
* fns.c (optimize_sub_char_table): Don't optimize a sub-char-table
|
||||
|
|
@ -271,7 +282,7 @@
|
|||
|
||||
2006-08-27 Martin Rudalics <rudalics@gmx.at>
|
||||
|
||||
* xdisp.c (mouse_autoselect_window): Removed.
|
||||
* xdisp.c (mouse_autoselect_window): Remove.
|
||||
(Vmouse_autoselect_window): New variable. DEFVAR_LISP it.
|
||||
|
||||
* dispextern.h (mouse_autoselect_window): Remove extern.
|
||||
|
|
|
|||
50
src/regex.c
50
src/regex.c
|
|
@ -3877,11 +3877,13 @@ analyse_first (p, pend, fastmap, multibyte)
|
|||
if (fastmap)
|
||||
{
|
||||
int c = RE_STRING_CHAR (p + 1, pend - p);
|
||||
|
||||
/* When fast-scanning, the fastmap can be indexed either with
|
||||
a char (smaller than 256) or with the first byte of
|
||||
a char's byte sequence. So we have to conservatively add
|
||||
both to the table. */
|
||||
if (SINGLE_BYTE_CHAR_P (c))
|
||||
fastmap[c] = 1;
|
||||
else
|
||||
fastmap[p[1]] = 1;
|
||||
fastmap[p[1]] = 1;
|
||||
}
|
||||
break;
|
||||
|
||||
|
|
@ -3899,6 +3901,10 @@ analyse_first (p, pend, fastmap, multibyte)
|
|||
So any that are not listed in the charset
|
||||
are possible matches, even in multibyte buffers. */
|
||||
if (!fastmap) break;
|
||||
/* We don't need to mark LEADING_CODE_8_BIT_CONTROL specially
|
||||
because it will automatically be set when needed by virtue of
|
||||
being larger than the highest char of its charset (0xbf) but
|
||||
smaller than (1<<BYTEWIDTH). */
|
||||
for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
|
||||
j < (1 << BYTEWIDTH); j++)
|
||||
fastmap[j] = 1;
|
||||
|
|
@ -3909,7 +3915,13 @@ analyse_first (p, pend, fastmap, multibyte)
|
|||
for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++;
|
||||
j >= 0; j--)
|
||||
if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not)
|
||||
fastmap[j] = 1;
|
||||
{
|
||||
fastmap[j] = 1;
|
||||
#ifdef emacs
|
||||
if (j >= 0x80 && j < 0xa0)
|
||||
fastmap[LEADING_CODE_8_BIT_CONTROL] = 1;
|
||||
#endif
|
||||
}
|
||||
|
||||
if ((not && multibyte)
|
||||
/* Any character set can possibly contain a character
|
||||
|
|
@ -4352,11 +4364,33 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
|
|||
}
|
||||
}
|
||||
else
|
||||
while (range > lim && !fastmap[*d])
|
||||
do
|
||||
{
|
||||
d++;
|
||||
range--;
|
||||
}
|
||||
re_char *d_start = d;
|
||||
while (range > lim && !fastmap[*d])
|
||||
{
|
||||
d++;
|
||||
range--;
|
||||
}
|
||||
#ifdef emacs
|
||||
if (multibyte && range > lim)
|
||||
{
|
||||
/* Check that we are at the beginning of a char. */
|
||||
int at_boundary;
|
||||
AT_CHAR_BOUNDARY_P (at_boundary, d, d_start);
|
||||
if (at_boundary)
|
||||
break;
|
||||
else
|
||||
{ /* We have matched an internal byte of a char
|
||||
rather than the leading byte, so it's a false
|
||||
positive: we should keep scanning. */
|
||||
d++; range--;
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
break;
|
||||
} while (1);
|
||||
|
||||
startpos += irange - range;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue