mirror of
git://git.sv.gnu.org/emacs.git
synced 2025-12-28 08:11:05 -08:00
New file.
This commit is contained in:
parent
d2bc5a70e9
commit
0168c3d809
2 changed files with 1447 additions and 0 deletions
917
src/character.c
Normal file
917
src/character.c
Normal file
|
|
@ -0,0 +1,917 @@
|
|||
/* Basic character support.
|
||||
Copyright (C) 1995, 1997, 1998, 2001 Electrotechnical Laboratory, JAPAN.
|
||||
Licensed to the Free Software Foundation.
|
||||
Copyright (C) 2001 Free Software Foundation, Inc.
|
||||
Copyright (C) 2001, 2002
|
||||
National Institute of Advanced Industrial Science and Technology (AIST)
|
||||
Registration Number H13PRO009
|
||||
|
||||
This file is part of GNU Emacs.
|
||||
|
||||
GNU Emacs is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
GNU Emacs is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with GNU Emacs; see the file COPYING. If not, write to
|
||||
the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
Boston, MA 02111-1307, USA. */
|
||||
|
||||
/* At first, see the document in `character.h' to understand the code
|
||||
in this file. */
|
||||
|
||||
#ifdef emacs
|
||||
#include <config.h>
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef emacs
|
||||
|
||||
#include <sys/types.h>
|
||||
#include "lisp.h"
|
||||
#include "character.h"
|
||||
#include "buffer.h"
|
||||
#include "charset.h"
|
||||
#include "composite.h"
|
||||
#include "disptab.h"
|
||||
|
||||
#else /* not emacs */
|
||||
|
||||
#include "mulelib.h"
|
||||
|
||||
#endif /* emacs */
|
||||
|
||||
Lisp_Object Qcharacterp;
|
||||
|
||||
/* Vector of translation table ever defined.
|
||||
ID of a translation table is used to index this vector. */
|
||||
Lisp_Object Vtranslation_table_vector;
|
||||
|
||||
/* A char-table for characters which may invoke auto-filling. */
|
||||
Lisp_Object Vauto_fill_chars;
|
||||
|
||||
Lisp_Object Qauto_fill_chars;
|
||||
|
||||
Lisp_Object Vchar_unify_table;
|
||||
|
||||
/* A char-table. An element is non-nil iff the corresponding
|
||||
character has a printable glyph. */
|
||||
Lisp_Object Vprintable_chars;
|
||||
|
||||
/* A char-table. An elemnent is a column-width of the corresponding
|
||||
character. */
|
||||
Lisp_Object Vchar_width_table;
|
||||
|
||||
/* A char-table. An element is a symbol indicating the direction
|
||||
property of corresponding character. */
|
||||
Lisp_Object Vchar_direction_table;
|
||||
|
||||
/* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */
|
||||
unsigned char *_fetch_multibyte_char_p;
|
||||
int _fetch_multibyte_char_len;
|
||||
|
||||
|
||||
|
||||
int
|
||||
char_string_with_unification (c, p, advanced)
|
||||
int c;
|
||||
unsigned char *p, **advanced;
|
||||
{
|
||||
int bytes;
|
||||
|
||||
MAYBE_UNIFY_CHAR (c);
|
||||
|
||||
if (c <= MAX_3_BYTE_CHAR || c > MAX_5_BYTE_CHAR)
|
||||
{
|
||||
bytes = CHAR_STRING (c, p);
|
||||
}
|
||||
else if (c <= MAX_4_BYTE_CHAR)
|
||||
{
|
||||
p[0] = (0xF0 | (c >> 18));
|
||||
p[1] = (0x80 | ((c >> 12) & 0x3F));
|
||||
p[2] = (0x80 | ((c >> 6) & 0x3F));
|
||||
p[3] = (0x80 | (c & 0x3F));
|
||||
bytes = 4;
|
||||
}
|
||||
else
|
||||
{
|
||||
p[0] = 0xF8;
|
||||
p[1] = (0x80 | ((c >> 18) & 0x0F));
|
||||
p[2] = (0x80 | ((c >> 12) & 0x3F));
|
||||
p[3] = (0x80 | ((c >> 6) & 0x3F));
|
||||
p[4] = (0x80 | (c & 0x3F));
|
||||
bytes = 5;
|
||||
}
|
||||
if (advanced)
|
||||
*advanced = p + bytes;
|
||||
return bytes;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
string_char_with_unification (p, advanced, len)
|
||||
unsigned char *p, **advanced;
|
||||
int *len;
|
||||
{
|
||||
int c, unified;
|
||||
unsigned char *saved_p = p;
|
||||
|
||||
if (*p < 0x80 || ! (*p & 0x20) || ! (*p & 0x10))
|
||||
{
|
||||
c = STRING_CHAR_ADVANCE (p);
|
||||
}
|
||||
else if (! (*p & 0x08))
|
||||
{
|
||||
c = ((((p)[0] & 0xF) << 18)
|
||||
| (((p)[1] & 0x3F) << 12)
|
||||
| (((p)[2] & 0x3F) << 6)
|
||||
| ((p)[3] & 0x3F));
|
||||
p += 4;
|
||||
}
|
||||
else
|
||||
{
|
||||
c = ((((p)[1] & 0x3F) << 18)
|
||||
| (((p)[2] & 0x3F) << 12)
|
||||
| (((p)[3] & 0x3F) << 6)
|
||||
| ((p)[4] & 0x3F));
|
||||
p += 5;
|
||||
}
|
||||
|
||||
MAYBE_UNIFY_CHAR (c);
|
||||
|
||||
if (len)
|
||||
*len = p - saved_p;
|
||||
if (advanced)
|
||||
*advanced = p;
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
/* Translate character C by translation table TABLE. If C is
|
||||
negative, translate a character specified by CHARSET and CODE. If
|
||||
no translation is found in TABLE, return the untranslated
|
||||
character. */
|
||||
|
||||
int
|
||||
translate_char (table, c)
|
||||
Lisp_Object table;
|
||||
int c;
|
||||
{
|
||||
Lisp_Object ch;
|
||||
|
||||
if (! CHAR_TABLE_P (table))
|
||||
return c;
|
||||
ch = CHAR_TABLE_REF (table, c);
|
||||
if (! CHARACTERP (ch))
|
||||
return c;
|
||||
return XINT (ch);
|
||||
}
|
||||
|
||||
/* Convert the unibyte character C to the corresponding multibyte
|
||||
character based on the current value of charset_primary. If C
|
||||
can't be converted, return C. */
|
||||
|
||||
int
|
||||
unibyte_char_to_multibyte (c)
|
||||
int c;
|
||||
{
|
||||
struct charset *charset = CHARSET_FROM_ID (charset_primary);
|
||||
int c1 = DECODE_CHAR (charset, c);
|
||||
|
||||
return ((c1 >= 0) ? c1 : c);
|
||||
}
|
||||
|
||||
|
||||
/* Convert the multibyte character C to unibyte 8-bit character based
|
||||
on the current value of charset_primary. If dimension of
|
||||
charset_primary is more than one, return (C & 0xFF).
|
||||
|
||||
The argument REV_TBL is now ignored. It will be removed in the
|
||||
future. */
|
||||
|
||||
int
|
||||
multibyte_char_to_unibyte (c, rev_tbl)
|
||||
int c;
|
||||
Lisp_Object rev_tbl;
|
||||
{
|
||||
struct charset *charset = CHARSET_FROM_ID (charset_primary);
|
||||
unsigned c1 = ENCODE_CHAR (charset, c);
|
||||
|
||||
return ((c1 != CHARSET_INVALID_CODE (charset)) ? c1 : c & 0xFF);
|
||||
}
|
||||
|
||||
|
||||
DEFUN ("characterp", Fcharacterp, Scharacterp, 1, 2, 0,
|
||||
doc: /* Return non-nil if OBJECT is a character. */)
|
||||
(object, ignore)
|
||||
Lisp_Object object, ignore;
|
||||
{
|
||||
return (CHARACTERP (object) ? Qt : Qnil);
|
||||
}
|
||||
|
||||
DEFUN ("max-char", Fmax_char, Smax_char, 0, 0, 0,
|
||||
doc: /* Return the character of the maximum code. */)
|
||||
()
|
||||
{
|
||||
return make_number (MAX_CHAR);
|
||||
}
|
||||
|
||||
DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
|
||||
Sunibyte_char_to_multibyte, 1, 1, 0,
|
||||
doc: /* Convert the unibyte character CH to multibyte character.
|
||||
The multibyte character is a result of decoding CH by
|
||||
the current primary charset (value of `charset-primary'). */)
|
||||
(ch)
|
||||
Lisp_Object ch;
|
||||
{
|
||||
int c;
|
||||
struct charset *charset;
|
||||
|
||||
CHECK_CHARACTER (ch);
|
||||
c = XFASTINT (ch);
|
||||
if (c >= 0400)
|
||||
error ("Invalid unibyte character: %d", c);
|
||||
charset = CHARSET_FROM_ID (charset_primary);
|
||||
c = DECODE_CHAR (charset, c);
|
||||
if (c < 0)
|
||||
error ("Can't convert to multibyte character: %d", XINT (ch));
|
||||
return make_number (c);
|
||||
}
|
||||
|
||||
DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
|
||||
Smultibyte_char_to_unibyte, 1, 1, 0,
|
||||
doc: /* Convert the multibyte character CH to unibyte character.\n\
|
||||
The unibyte character is a result of encoding CH by
|
||||
the current primary charset (value of `charset-primary'). */)
|
||||
(ch)
|
||||
Lisp_Object ch;
|
||||
{
|
||||
int c;
|
||||
unsigned code;
|
||||
struct charset *charset;
|
||||
|
||||
CHECK_CHARACTER (ch);
|
||||
c = XFASTINT (ch);
|
||||
charset = CHARSET_FROM_ID (charset_primary);
|
||||
code = ENCODE_CHAR (charset, c);
|
||||
if (code < CHARSET_MIN_CODE (charset)
|
||||
|| code > CHARSET_MAX_CODE (charset))
|
||||
error ("Can't convert to unibyte character: %d", XINT (ch));
|
||||
return make_number (code);
|
||||
}
|
||||
|
||||
DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
|
||||
doc: /* Return 1 regardless of the argument CHAR.
|
||||
This is now an obsolete function. We keep it just for backward compatibility. */)
|
||||
(ch)
|
||||
Lisp_Object ch;
|
||||
{
|
||||
CHECK_CHARACTER (ch);
|
||||
return make_number (1);
|
||||
}
|
||||
|
||||
DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
|
||||
doc: /* Return width of CHAR when displayed in the current buffer.
|
||||
The width is measured by how many columns it occupies on the screen.
|
||||
Tab is taken to occupy `tab-width' columns. */)
|
||||
(ch)
|
||||
Lisp_Object ch;
|
||||
{
|
||||
Lisp_Object disp;
|
||||
int c, width;
|
||||
struct Lisp_Char_Table *dp = buffer_display_table ();
|
||||
|
||||
CHECK_CHARACTER (ch);
|
||||
c = XINT (ch);
|
||||
|
||||
/* Get the way the display table would display it. */
|
||||
disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
|
||||
|
||||
if (VECTORP (disp))
|
||||
width = ASIZE (disp);
|
||||
else
|
||||
width = CHAR_WIDTH (c);
|
||||
|
||||
return make_number (width);
|
||||
}
|
||||
|
||||
/* Return width of string STR of length LEN when displayed in the
|
||||
current buffer. The width is measured by how many columns it
|
||||
occupies on the screen. */
|
||||
|
||||
int
|
||||
strwidth (str, len)
|
||||
unsigned char *str;
|
||||
int len;
|
||||
{
|
||||
return c_string_width (str, len, -1, NULL, NULL);
|
||||
}
|
||||
|
||||
/* Return width of string STR of length LEN when displayed in the
|
||||
current buffer. The width is measured by how many columns it
|
||||
occupies on the screen. If PRECISION > 0, return the width of
|
||||
longest substring that doesn't exceed PRECISION, and set number of
|
||||
characters and bytes of the substring in *NCHARS and *NBYTES
|
||||
respectively. */
|
||||
|
||||
c_string_width (str, len, precision, nchars, nbytes)
|
||||
unsigned char *str;
|
||||
int precision, *nchars, *nbytes;
|
||||
{
|
||||
int i = 0, i_byte = 0;
|
||||
int width = 0;
|
||||
struct Lisp_Char_Table *dp = buffer_display_table ();
|
||||
|
||||
while (i_byte < len)
|
||||
{
|
||||
int bytes, thiswidth;
|
||||
Lisp_Object val;
|
||||
int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
|
||||
|
||||
if (dp)
|
||||
{
|
||||
val = DISP_CHAR_VECTOR (dp, c);
|
||||
if (VECTORP (val))
|
||||
thiswidth = XVECTOR (val)->size;
|
||||
else
|
||||
thiswidth = CHAR_WIDTH (c);
|
||||
}
|
||||
else
|
||||
{
|
||||
thiswidth = CHAR_WIDTH (c);
|
||||
}
|
||||
|
||||
if (precision > 0
|
||||
&& (width + thiswidth > precision))
|
||||
{
|
||||
*nchars = i;
|
||||
*nbytes = i_byte;
|
||||
return width;
|
||||
}
|
||||
i++;
|
||||
i_byte += bytes;
|
||||
width += thiswidth;
|
||||
}
|
||||
|
||||
if (precision > 0)
|
||||
{
|
||||
*nchars = i;
|
||||
*nbytes = i_byte;
|
||||
}
|
||||
|
||||
return width;
|
||||
}
|
||||
|
||||
/* Return width of Lisp string STRING when displayed in the current
|
||||
buffer. The width is measured by how many columns it occupies on
|
||||
the screen while paying attention to compositions. If PRECISION >
|
||||
0, return the width of longest substring that doesn't exceed
|
||||
PRECISION, and set number of characters and bytes of the substring
|
||||
in *NCHARS and *NBYTES respectively. */
|
||||
|
||||
int
|
||||
lisp_string_width (string, precision, nchars, nbytes)
|
||||
Lisp_Object string;
|
||||
int precision, *nchars, *nbytes;
|
||||
{
|
||||
int len = XSTRING (string)->size;
|
||||
int len_byte = STRING_BYTES (XSTRING (string));
|
||||
unsigned char *str = XSTRING (string)->data;
|
||||
int i = 0, i_byte = 0;
|
||||
int width = 0;
|
||||
struct Lisp_Char_Table *dp = buffer_display_table ();
|
||||
|
||||
while (i < len)
|
||||
{
|
||||
int chars, bytes, thiswidth;
|
||||
Lisp_Object val;
|
||||
int cmp_id;
|
||||
int ignore, end;
|
||||
|
||||
if (find_composition (i, -1, &ignore, &end, &val, string)
|
||||
&& ((cmp_id = get_composition_id (i, i_byte, end - i, val, string))
|
||||
>= 0))
|
||||
{
|
||||
thiswidth = composition_table[cmp_id]->width;
|
||||
chars = end - i;
|
||||
bytes = string_char_to_byte (string, end) - i_byte;
|
||||
}
|
||||
else if (dp)
|
||||
{
|
||||
int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
|
||||
|
||||
chars = 1;
|
||||
val = DISP_CHAR_VECTOR (dp, c);
|
||||
if (VECTORP (val))
|
||||
thiswidth = XVECTOR (val)->size;
|
||||
else
|
||||
thiswidth = CHAR_WIDTH (c);
|
||||
}
|
||||
else
|
||||
{
|
||||
int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
|
||||
|
||||
chars = 1;
|
||||
thiswidth = CHAR_WIDTH (c);
|
||||
}
|
||||
|
||||
if (precision > 0
|
||||
&& (width + thiswidth > precision))
|
||||
{
|
||||
*nchars = i;
|
||||
*nbytes = i_byte;
|
||||
return width;
|
||||
}
|
||||
i += chars;
|
||||
i_byte += bytes;
|
||||
width += thiswidth;
|
||||
}
|
||||
|
||||
if (precision > 0)
|
||||
{
|
||||
*nchars = i;
|
||||
*nbytes = i_byte;
|
||||
}
|
||||
|
||||
return width;
|
||||
}
|
||||
|
||||
DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
|
||||
doc: /* Return width of STRING when displayed in the current buffer.
|
||||
Width is measured by how many columns it occupies on the screen.
|
||||
When calculating width of a multibyte character in STRING,
|
||||
only the base leading-code is considered; the validity of
|
||||
the following bytes is not checked. Tabs in STRING are always
|
||||
taken to occupy `tab-width' columns. */)
|
||||
(str)
|
||||
Lisp_Object str;
|
||||
{
|
||||
Lisp_Object val;
|
||||
|
||||
CHECK_STRING (str);
|
||||
XSETFASTINT (val, lisp_string_width (str, -1, NULL, NULL));
|
||||
return val;
|
||||
}
|
||||
|
||||
DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
|
||||
doc: /* Return the direction of CHAR.
|
||||
The returned value is 0 for left-to-right and 1 for right-to-left. */)
|
||||
(ch)
|
||||
Lisp_Object ch;
|
||||
{
|
||||
int c;
|
||||
|
||||
CHECK_CHARACTER (ch);
|
||||
c = XINT (ch);
|
||||
return CHAR_TABLE_REF (Vchar_direction_table, c);
|
||||
}
|
||||
|
||||
DEFUN ("chars-in-region", Fchars_in_region, Schars_in_region, 2, 2, 0,
|
||||
doc: /* Return number of characters between BEG and END.
|
||||
This is now an obsolete function. We keep it just for backward compatibility. */)
|
||||
(beg, end)
|
||||
Lisp_Object beg, end;
|
||||
{
|
||||
int from, to;
|
||||
|
||||
CHECK_NUMBER_COERCE_MARKER (beg);
|
||||
CHECK_NUMBER_COERCE_MARKER (end);
|
||||
|
||||
from = min (XFASTINT (beg), XFASTINT (end));
|
||||
to = max (XFASTINT (beg), XFASTINT (end));
|
||||
|
||||
return make_number (to - from);
|
||||
}
|
||||
|
||||
/* Return the number of characters in the NBYTES bytes at PTR.
|
||||
This works by looking at the contents and checking for multibyte
|
||||
sequences while assuming that there's no invalid sequence.
|
||||
However, if the current buffer has enable-multibyte-characters =
|
||||
nil, we treat each byte as a character. */
|
||||
|
||||
int
|
||||
chars_in_text (ptr, nbytes)
|
||||
unsigned char *ptr;
|
||||
int nbytes;
|
||||
{
|
||||
/* current_buffer is null at early stages of Emacs initialization. */
|
||||
if (current_buffer == 0
|
||||
|| NILP (current_buffer->enable_multibyte_characters))
|
||||
return nbytes;
|
||||
|
||||
return multibyte_chars_in_text (ptr, nbytes);
|
||||
}
|
||||
|
||||
/* Return the number of characters in the NBYTES bytes at PTR.
|
||||
This works by looking at the contents and checking for multibyte
|
||||
sequences while assuming that there's no invalid sequence. It
|
||||
ignores enable-multibyte-characters. */
|
||||
|
||||
int
|
||||
multibyte_chars_in_text (ptr, nbytes)
|
||||
unsigned char *ptr;
|
||||
int nbytes;
|
||||
{
|
||||
unsigned char *endp = ptr + nbytes;
|
||||
int chars = 0;
|
||||
|
||||
while (ptr < endp)
|
||||
{
|
||||
int len = MULTIBYTE_LENGTH (ptr, endp);
|
||||
|
||||
if (len == 0)
|
||||
abort ();
|
||||
ptr += len;
|
||||
chars++;
|
||||
}
|
||||
|
||||
return chars;
|
||||
}
|
||||
|
||||
/* Parse unibyte text at STR of LEN bytes as a multibyte text, count
|
||||
characters and bytes in it, and store them in *NCHARS and *NBYTES
|
||||
respectively. On counting bytes, pay attention to that 8-bit
|
||||
characters not constructing a valid multibyte sequence are
|
||||
represented by 2-byte in a multibyte text. */
|
||||
|
||||
void
|
||||
parse_str_as_multibyte (str, len, nchars, nbytes)
|
||||
unsigned char *str;
|
||||
int len, *nchars, *nbytes;
|
||||
{
|
||||
unsigned char *endp = str + len;
|
||||
int n, chars = 0, bytes = 0;
|
||||
|
||||
if (len >= MAX_MULTIBYTE_LENGTH)
|
||||
{
|
||||
unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
|
||||
while (str < adjusted_endp)
|
||||
{
|
||||
if ((n = MULTIBYTE_LENGTH_NO_CHECK (str)) > 0)
|
||||
str += n, bytes += n;
|
||||
else
|
||||
str++, bytes += 2;
|
||||
chars++;
|
||||
}
|
||||
}
|
||||
while (str < endp)
|
||||
{
|
||||
if ((n = MULTIBYTE_LENGTH (str, endp)) > 0)
|
||||
str += n, bytes += n;
|
||||
else
|
||||
str++, bytes += 2;
|
||||
chars++;
|
||||
}
|
||||
|
||||
*nchars = chars;
|
||||
*nbytes = bytes;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Arrange unibyte text at STR of NBYTES bytes as a multibyte text.
|
||||
It actually converts only such 8-bit characters that don't contruct
|
||||
a multibyte sequence to multibyte forms of Latin-1 characters. If
|
||||
NCHARS is nonzero, set *NCHARS to the number of characters in the
|
||||
text. It is assured that we can use LEN bytes at STR as a work
|
||||
area and that is enough. Return the number of bytes of the
|
||||
resulting text. */
|
||||
|
||||
int
|
||||
str_as_multibyte (str, len, nbytes, nchars)
|
||||
unsigned char *str;
|
||||
int len, nbytes, *nchars;
|
||||
{
|
||||
unsigned char *p = str, *endp = str + nbytes;
|
||||
unsigned char *to;
|
||||
int chars = 0;
|
||||
int n;
|
||||
|
||||
if (nbytes >= MAX_MULTIBYTE_LENGTH)
|
||||
{
|
||||
unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
|
||||
while (p < adjusted_endp
|
||||
&& (n = MULTIBYTE_LENGTH_NO_CHECK (p)) > 0)
|
||||
p += n, chars++;
|
||||
}
|
||||
while ((n = MULTIBYTE_LENGTH (p, endp)) > 0)
|
||||
p += n, chars++;
|
||||
if (nchars)
|
||||
*nchars = chars;
|
||||
if (p == endp)
|
||||
return nbytes;
|
||||
|
||||
to = p;
|
||||
nbytes = endp - p;
|
||||
endp = str + len;
|
||||
safe_bcopy ((char *) p, (char *) (endp - nbytes), nbytes);
|
||||
p = endp - nbytes;
|
||||
|
||||
if (nbytes >= MAX_MULTIBYTE_LENGTH)
|
||||
{
|
||||
unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
|
||||
while (p < adjusted_endp)
|
||||
{
|
||||
if ((n = MULTIBYTE_LENGTH_NO_CHECK (p)) > 0)
|
||||
{
|
||||
while (n--)
|
||||
*to++ = *p++;
|
||||
}
|
||||
else
|
||||
{
|
||||
int c = *p++;
|
||||
c = BYTE8_TO_CHAR (c);
|
||||
to += CHAR_STRING (c, to);
|
||||
}
|
||||
}
|
||||
chars++;
|
||||
}
|
||||
while (p < endp)
|
||||
{
|
||||
if ((n = MULTIBYTE_LENGTH (p, endp)) > 0)
|
||||
{
|
||||
while (n--)
|
||||
*to++ = *p++;
|
||||
}
|
||||
else
|
||||
{
|
||||
int c = *p++;
|
||||
c = BYTE8_TO_CHAR (c);
|
||||
to += CHAR_STRING (c, to);
|
||||
}
|
||||
chars++;
|
||||
}
|
||||
if (nchars)
|
||||
*nchars = chars;
|
||||
return (to - str);
|
||||
}
|
||||
|
||||
/* Parse unibyte string at STR of LEN bytes, and return the number of
|
||||
bytes it may ocupy when converted to multibyte string by
|
||||
`str_to_multibyte'. */
|
||||
|
||||
int
|
||||
parse_str_to_multibyte (str, len)
|
||||
unsigned char *str;
|
||||
int len;
|
||||
{
|
||||
unsigned char *endp = str + len;
|
||||
int bytes;
|
||||
|
||||
for (bytes = 0; str < endp; str++)
|
||||
bytes += (*str < 0x80) ? 1 : 2;
|
||||
return bytes;
|
||||
}
|
||||
|
||||
|
||||
/* Convert unibyte text at STR of NBYTES bytes to a multibyte text
|
||||
that contains the same single-byte characters. It actually
|
||||
converts all 8-bit characters to multibyte forms. It is assured
|
||||
that we can use LEN bytes at STR as a work area and that is
|
||||
enough. */
|
||||
|
||||
int
|
||||
str_to_multibyte (str, len, bytes)
|
||||
unsigned char *str;
|
||||
int len, bytes;
|
||||
{
|
||||
unsigned char *p = str, *endp = str + bytes;
|
||||
unsigned char *to;
|
||||
|
||||
while (p < endp && *p < 0x80) p++;
|
||||
if (p == endp)
|
||||
return bytes;
|
||||
to = p;
|
||||
bytes = endp - p;
|
||||
endp = str + len;
|
||||
safe_bcopy ((char *) p, (char *) (endp - bytes), bytes);
|
||||
p = endp - bytes;
|
||||
while (p < endp)
|
||||
{
|
||||
int c = *p++;
|
||||
|
||||
if (c >= 0x80)
|
||||
c = BYTE8_TO_CHAR (c);
|
||||
to += CHAR_STRING (c, to);
|
||||
}
|
||||
return (to - str);
|
||||
}
|
||||
|
||||
/* Arrange multibyte text at STR of LEN bytes as a unibyte text. It
|
||||
actually converts characters in the range 0x80..0xFF to
|
||||
unibyte. */
|
||||
|
||||
int
|
||||
str_as_unibyte (str, bytes)
|
||||
unsigned char *str;
|
||||
int bytes;
|
||||
{
|
||||
unsigned char *p = str, *endp = str + bytes;
|
||||
unsigned char *to = str;
|
||||
int c, len;
|
||||
|
||||
while (p < endp)
|
||||
{
|
||||
c = *p;
|
||||
len = BYTES_BY_CHAR_HEAD (c);
|
||||
if (CHAR_BYTE8_HEAD_P (c))
|
||||
break;
|
||||
p += len;
|
||||
}
|
||||
to = p;
|
||||
while (p < endp)
|
||||
{
|
||||
c = *p;
|
||||
len = BYTES_BY_CHAR_HEAD (c);
|
||||
if (CHAR_BYTE8_HEAD_P (c))
|
||||
{
|
||||
c = STRING_CHAR_ADVANCE (p);
|
||||
*to++ = CHAR_TO_BYTE8 (c);
|
||||
}
|
||||
else
|
||||
{
|
||||
while (len--) *to++ = *p++;
|
||||
}
|
||||
}
|
||||
return (to - str);
|
||||
}
|
||||
|
||||
int
|
||||
string_count_byte8 (string)
|
||||
Lisp_Object string;
|
||||
{
|
||||
int multibyte = STRING_MULTIBYTE (string);
|
||||
int nchars = XSTRING (string)->size;
|
||||
int nbytes = STRING_BYTES (XSTRING (string));
|
||||
unsigned char *p = XSTRING (string)->data;
|
||||
unsigned char *pend = p + nbytes;
|
||||
int count = 0;
|
||||
int c, len;
|
||||
|
||||
if (multibyte)
|
||||
while (p < pend)
|
||||
{
|
||||
c = *p;
|
||||
len = BYTES_BY_CHAR_HEAD (c);
|
||||
|
||||
if (CHAR_BYTE8_HEAD_P (c))
|
||||
count++;
|
||||
p += len;
|
||||
}
|
||||
else
|
||||
while (p < pend)
|
||||
{
|
||||
if (*p++ >= 0x80)
|
||||
count++;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
|
||||
Lisp_Object
|
||||
string_escape_byte8 (string)
|
||||
Lisp_Object string;
|
||||
{
|
||||
int nchars = XSTRING (string)->size;
|
||||
int nbytes = STRING_BYTES (XSTRING (string));
|
||||
int multibyte = STRING_MULTIBYTE (string);
|
||||
int byte8_count;
|
||||
unsigned char *src, *src_end, *dst;
|
||||
Lisp_Object val;
|
||||
int c, len;
|
||||
|
||||
if (multibyte && nchars == nbytes)
|
||||
return string;
|
||||
|
||||
byte8_count = string_count_byte8 (string);
|
||||
|
||||
if (byte8_count == 0)
|
||||
return string;
|
||||
|
||||
if (multibyte)
|
||||
/* Convert 2-byte sequence of byte8 chars to 4-byte octal. */
|
||||
val = make_uninit_multibyte_string (nchars + byte8_count * 2,
|
||||
nbytes + byte8_count * 2);
|
||||
else
|
||||
/* Convert 1-byte sequence of byte8 chars to 4-byte octal. */
|
||||
val = make_uninit_string (nbytes + byte8_count * 3);
|
||||
|
||||
src = XSTRING (string)->data;
|
||||
src_end = src + nbytes;
|
||||
dst = XSTRING (val)->data;
|
||||
if (multibyte)
|
||||
while (src < src_end)
|
||||
{
|
||||
c = *src;
|
||||
len = BYTES_BY_CHAR_HEAD (c);
|
||||
|
||||
if (CHAR_BYTE8_HEAD_P (c))
|
||||
{
|
||||
c = STRING_CHAR_ADVANCE (src);
|
||||
c = CHAR_TO_BYTE8 (c);
|
||||
sprintf (dst, "\\%03o", c);
|
||||
dst += 4;
|
||||
}
|
||||
else
|
||||
while (len--) *dst++ = *src++;
|
||||
}
|
||||
else
|
||||
while (src < src_end)
|
||||
{
|
||||
c = *src++;
|
||||
if (c >= 0x80)
|
||||
{
|
||||
sprintf (dst, "\\%03o", c);
|
||||
dst += 4;
|
||||
}
|
||||
else
|
||||
*dst++ = c;
|
||||
}
|
||||
return val;
|
||||
}
|
||||
|
||||
|
||||
DEFUN ("string", Fstring, Sstring, 1, MANY, 0,
|
||||
doc: /*
|
||||
Concatenate all the argument characters and make the result a string. */)
|
||||
(n, args)
|
||||
int n;
|
||||
Lisp_Object *args;
|
||||
{
|
||||
int i;
|
||||
unsigned char *buf = (unsigned char *) alloca (MAX_MULTIBYTE_LENGTH * n);
|
||||
unsigned char *p = buf;
|
||||
int c;
|
||||
|
||||
for (i = 0; i < n; i++)
|
||||
{
|
||||
CHECK_CHARACTER (args[i]);
|
||||
c = XINT (args[i]);
|
||||
p += CHAR_STRING (c, p);
|
||||
}
|
||||
|
||||
return make_string_from_bytes ((char *) buf, n, p - buf);
|
||||
}
|
||||
|
||||
void
|
||||
init_character_once ()
|
||||
{
|
||||
}
|
||||
|
||||
#ifdef emacs
|
||||
|
||||
void
|
||||
syms_of_character ()
|
||||
{
|
||||
DEFSYM (Qcharacterp, "characterp");
|
||||
DEFSYM (Qauto_fill_chars, "auto-fill-chars");
|
||||
|
||||
staticpro (&Vchar_unify_table);
|
||||
Vchar_unify_table = Qnil;
|
||||
|
||||
defsubr (&Smax_char);
|
||||
defsubr (&Scharacterp);
|
||||
defsubr (&Sunibyte_char_to_multibyte);
|
||||
defsubr (&Smultibyte_char_to_unibyte);
|
||||
defsubr (&Schar_bytes);
|
||||
defsubr (&Schar_width);
|
||||
defsubr (&Sstring_width);
|
||||
defsubr (&Schar_direction);
|
||||
defsubr (&Schars_in_region);
|
||||
defsubr (&Sstring);
|
||||
|
||||
DEFVAR_LISP ("translation-table-vector", &Vtranslation_table_vector,
|
||||
doc: /*
|
||||
Vector of cons cell of a symbol and translation table ever defined.
|
||||
An ID of a translation table is an index of this vector. */);
|
||||
Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil);
|
||||
|
||||
DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars,
|
||||
doc: /*
|
||||
A char-table for characters which invoke auto-filling.
|
||||
Such characters have value t in this table. */);
|
||||
Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil);
|
||||
CHAR_TABLE_SET (Vauto_fill_chars, make_number (' '), Qt);
|
||||
CHAR_TABLE_SET (Vauto_fill_chars, make_number ('\n'), Qt);
|
||||
|
||||
DEFVAR_LISP ("char-width-table", &Vchar_width_table,
|
||||
doc: /*
|
||||
A char-table for width (columns) of each character. */);
|
||||
Vchar_width_table = Fmake_char_table (Qnil, make_number (1));
|
||||
|
||||
DEFVAR_LISP ("char-direction-table", &Vchar_direction_table,
|
||||
doc: /* A char-table for direction of each character. */);
|
||||
Vchar_direction_table = Fmake_char_table (Qnil, make_number (1));
|
||||
|
||||
DEFVAR_LISP ("printable-chars", &Vprintable_chars,
|
||||
doc: /* A char-table for each printable character. */);
|
||||
Vprintable_chars = Fmake_char_table (Qnil, Qt);
|
||||
}
|
||||
|
||||
#endif /* emacs */
|
||||
530
src/character.h
Normal file
530
src/character.h
Normal file
|
|
@ -0,0 +1,530 @@
|
|||
/* Header for multibyte character handler.
|
||||
Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
|
||||
Licensed to the Free Software Foundation.
|
||||
Copyright (C) 2001, 2002
|
||||
National Institute of Advanced Industrial Science and Technology (AIST)
|
||||
Registration Number H13PRO009
|
||||
|
||||
This file is part of GNU Emacs.
|
||||
|
||||
GNU Emacs is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
GNU Emacs is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with GNU Emacs; see the file COPYING. If not, write to
|
||||
the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
Boston, MA 02111-1307, USA. */
|
||||
|
||||
#ifndef EMACS_CHARACTER_H
|
||||
#define EMACS_CHARACTER_H
|
||||
|
||||
/* 0-7F 0xxxxxxx
|
||||
00..7F
|
||||
80-7FF 110xxxxx 10xxxxxx
|
||||
C2..DF 80..BF
|
||||
800-FFFF 1110xxxx 10xxxxxx 10xxxxxx
|
||||
E0..EF 80..BF 80..BF
|
||||
10000-1FFFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
||||
F0..F7 80..BF 80..BF 80..BF
|
||||
200000-3FFF7F 11111000 1000xxxx 10xxxxxx 10xxxxxx 10xxxxxx
|
||||
F8 80..8F 80..BF 80..BF 80..BF
|
||||
invalid 11111001
|
||||
F9
|
||||
invalid 1111101x
|
||||
FA..FB
|
||||
invalid 111111xx
|
||||
FC..FE
|
||||
|
||||
raw-8-bit
|
||||
3FFF80-3FFFFF 1100000x 10xxxxxx
|
||||
C0..C1 80..BF
|
||||
|
||||
*/
|
||||
|
||||
/* This is the maximum character code ((1 << CHARACTERBITS) - 1). */
|
||||
#define MAX_CHAR 0x3FFFFF
|
||||
|
||||
#define MAX_UNICODE_CHAR 0x10FFFF
|
||||
|
||||
#define MAX_1_BYTE_CHAR 0x7F
|
||||
#define MAX_2_BYTE_CHAR 0x7FF
|
||||
#define MAX_3_BYTE_CHAR 0xFFFF
|
||||
#define MAX_4_BYTE_CHAR 0x1FFFFF
|
||||
#define MAX_5_BYTE_CHAR 0x3FFF7F
|
||||
|
||||
#define BYTE8_TO_CHAR(byte) ((byte) + 0x3FFF00)
|
||||
#define CHAR_TO_BYTE8(c) ((c) - 0x3FFF00)
|
||||
#define CHAR_BYTE8_P(c) ((c) > MAX_5_BYTE_CHAR)
|
||||
#define CHAR_BYTE8_HEAD_P(byte) ((byte) == 0xC0 || (byte) == 0xC1)
|
||||
|
||||
/* This is the maximum byte length of multi-byte sequence. */
|
||||
#define MAX_MULTIBYTE_LENGTH 5
|
||||
|
||||
/* Return a Lisp character whose code is C. */
|
||||
#define make_char(c) make_number (c)
|
||||
|
||||
/* Nonzero iff C is an ASCII byte. */
|
||||
#define ASCII_BYTE_P(c) ((unsigned) (c) < 0x80)
|
||||
|
||||
/* Nonzero iff X is a character. */
|
||||
#define CHARACTERP(x) (NATNUMP (x) && XFASTINT (x) <= MAX_CHAR)
|
||||
|
||||
/* Nozero iff C is valid as a charater code. GENERICP is not used
|
||||
now. It will be removed in the future. */
|
||||
#define CHAR_VALID_P(c, genericp) CHARACTERP (c)
|
||||
|
||||
/* Check if Lisp object X is a character or not. */
|
||||
#define CHECK_CHARACTER(x) \
|
||||
do { \
|
||||
if (! CHARACTERP(x)) x = wrong_type_argument (Qcharacterp, (x)); \
|
||||
} while (0)
|
||||
|
||||
/* Nonzero iff C is an ASCII character. */
|
||||
#define ASCII_CHAR_P(c) ((unsigned) (c) < 0x80)
|
||||
|
||||
/* Nonzero iff C is a character of code less than 0x100. */
|
||||
#define SINGLE_BYTE_CHAR_P(c) ((unsigned) (c) < 0x100)
|
||||
|
||||
/* Nonzero if character C has a printable glyph. */
|
||||
#define CHAR_PRINTABLE_P(c) \
|
||||
(((c) >= 32 && ((c) < 127) \
|
||||
|| ! NILP (CHAR_TABLE_REF (Vprintable_chars, (c)))))
|
||||
|
||||
/* How many bytes C occupies in a multibyte buffer. */
|
||||
#define CHAR_BYTES(c) \
|
||||
( (c) <= MAX_1_BYTE_CHAR ? 1 \
|
||||
: (c) <= MAX_2_BYTE_CHAR ? 2 \
|
||||
: (c) <= MAX_3_BYTE_CHAR ? 3 \
|
||||
: (c) <= MAX_4_BYTE_CHAR ? 4 \
|
||||
: (c) <= MAX_5_BYTE_CHAR ? 5 \
|
||||
: 2)
|
||||
|
||||
/* Store multibyte form of the character C in STR. The caller should
|
||||
allocate at least MAX_MULTIBYTE_LENGTH bytes area at STR in
|
||||
advance. Returns the length of the multibyte form. */
|
||||
|
||||
#define CHAR_STRING(c, p) \
|
||||
((unsigned) (c) <= MAX_1_BYTE_CHAR \
|
||||
? ((p)[0] = (c), \
|
||||
1) \
|
||||
: (unsigned) (c) <= MAX_2_BYTE_CHAR \
|
||||
? ((p)[0] = (0xC0 | ((c) >> 6)), \
|
||||
(p)[1] = (0x80 | ((c) & 0x3F)), \
|
||||
2) \
|
||||
: (unsigned) (c) <= MAX_3_BYTE_CHAR \
|
||||
? ((p)[0] = (0xE0 | ((c) >> 12)), \
|
||||
(p)[1] = (0x80 | (((c) >> 6) & 0x3F)), \
|
||||
(p)[2] = (0x80 | ((c) & 0x3F)), \
|
||||
3) \
|
||||
: (unsigned) (c) <= MAX_5_BYTE_CHAR \
|
||||
? char_string_with_unification (c, p, NULL) \
|
||||
: ((p)[0] = (0xC0 | (((c) >> 6) & 0x01)), \
|
||||
(p)[1] = (0x80 | ((c) & 0x3F)), \
|
||||
2))
|
||||
|
||||
|
||||
/* Like CHAR_STRING, but advance P to the end of the multibyte
|
||||
form. */
|
||||
|
||||
#define CHAR_STRING_ADVANCE(c, p) \
|
||||
((unsigned) (c) <= MAX_1_BYTE_CHAR \
|
||||
? *(p)++ = (c) \
|
||||
: (unsigned) (c) <= MAX_2_BYTE_CHAR \
|
||||
? (*(p)++ = (0xC0 | ((c) >> 6)), \
|
||||
*(p)++ = (0x80 | ((c) & 0x3F))) \
|
||||
: (unsigned) (c) <= MAX_3_BYTE_CHAR \
|
||||
? (*(p)++ = (0xE0 | ((c) >> 12)), \
|
||||
*(p)++ = (0x80 | (((c) >> 6) & 0x3F)), \
|
||||
*(p)++ = (0x80 | ((c) & 0x3F))) \
|
||||
: (unsigned) (c) <= MAX_5_BYTE_CHAR \
|
||||
? char_string_with_unification (c, p, &p) \
|
||||
: (*(p)++ = (0xC0 | (((c) >> 6) & 0x01)), \
|
||||
*(p)++ = (0x80 | ((c) & 0x3F))))
|
||||
|
||||
|
||||
/* Nonzero iff BYTE starts a character in a multibyte form. */
|
||||
#define CHAR_HEAD_P(byte) (((byte) & 0xC0) != 0x80)
|
||||
|
||||
/* Nonzero iff BYTE starts a non-ASCII character in a multibyte
|
||||
form. */
|
||||
#define LEADING_CODE_P(byte) (((byte) & 0xC0) == 0xC0)
|
||||
|
||||
/* Just kept for backward compatibility. This macro will be removed
|
||||
in the future. */
|
||||
#define BASE_LEADING_CODE_P LEADING_CODE_P
|
||||
|
||||
/* How many bytes a character that starts with BYTE occupies in a
|
||||
multibyte form. */
|
||||
#define BYTES_BY_CHAR_HEAD(byte) \
|
||||
(!((byte) & 0x80) ? 1 \
|
||||
: !((byte) & 0x20) ? 2 \
|
||||
: !((byte) & 0x10) ? 3 \
|
||||
: !((byte) & 0x08) ? 4 \
|
||||
: 5)
|
||||
|
||||
|
||||
/* Return the length of the multi-byte form at string STR of length
|
||||
LEN while assuming that STR points a valid multi-byte form. As
|
||||
this macro isn't necessary anymore, all callers will be changed to
|
||||
use BYTES_BY_CHAR_HEAD directly in the future. */
|
||||
|
||||
#define MULTIBYTE_FORM_LENGTH(str, len) \
|
||||
BYTES_BY_CHAR_HEAD (*(str))
|
||||
|
||||
/* Parse multibyte string STR of length LENGTH and set BYTES to the
|
||||
byte length of a character at STR while assuming that STR points a
|
||||
valid multibyte form. As this macro isn't necessary anymore, all
|
||||
callers will be changed to use BYTES_BY_CHAR_HEAD directly in the
|
||||
future. */
|
||||
|
||||
#define PARSE_MULTIBYTE_SEQ(str, length, bytes) \
|
||||
(bytes) = BYTES_BY_CHAR_HEAD (*(str))
|
||||
|
||||
/* The byte length of multibyte form at unibyte string P ending at
|
||||
PEND. If STR doesn't point a valid multibyte form, return 0. */
|
||||
|
||||
#define MULTIBYTE_LENGTH(p, pend) \
|
||||
(p >= pend ? 0 \
|
||||
: !((p)[0] & 0x80) ? 1 \
|
||||
: ((p + 1 >= pend) || (((p)[1] & 0xC0) != 0x80)) ? 0 \
|
||||
: ((p)[0] & 0xE0) == 0xC0 ? 2 \
|
||||
: ((p + 2 >= pend) || (((p)[2] & 0xC0) != 0x80)) ? 0 \
|
||||
: ((p)[0] & 0xF0) == 0xE0 ? 3 \
|
||||
: ((p + 3 >= pend) || (((p)[3] & 0xC0) != 0x80)) ? 0 \
|
||||
: ((p)[0] & 0xF8) == 0xF0 ? 4 \
|
||||
: ((p + 4 >= pend) || (((p)[4] & 0xC0) != 0x80)) ? 0 \
|
||||
: (p)[0] == 0xF8 && ((p)[1] & 0xF0) == 0x80 ? 5 \
|
||||
: 0)
|
||||
|
||||
|
||||
/* Like MULTIBYTE_LENGTH but don't check the ending address. */
|
||||
|
||||
#define MULTIBYTE_LENGTH_NO_CHECK(p) \
|
||||
(!((p)[0] & 0x80) ? 1 \
|
||||
: ((p)[1] & 0xC0) != 0x80 ? 0 \
|
||||
: ((p)[0] & 0xE0) == 0xC0 ? 2 \
|
||||
: ((p)[2] & 0xC0) != 0x80 ? 0 \
|
||||
: ((p)[0] & 0xF0) == 0xE0 ? 3 \
|
||||
: ((p)[3] & 0xC0) != 0x80 ? 0 \
|
||||
: ((p)[0] & 0xF8) == 0xF0 ? 4 \
|
||||
: ((p)[4] & 0xC0) != 0x80 ? 0 \
|
||||
: (p)[0] == 0xF8 && ((p)[1] & 0xF0) == 0x80 ? 5 \
|
||||
: 0)
|
||||
|
||||
|
||||
/* Return the character code of character whose multibyte form is at
|
||||
P. The argument LEN is ignored. It will be removed in the
|
||||
future. */
|
||||
|
||||
#define STRING_CHAR(p, len) \
|
||||
(!((p)[0] & 0x80) \
|
||||
? (p)[0] \
|
||||
: ! ((p)[0] & 0x20) \
|
||||
? (((((p)[0] & 0x1F) << 6) \
|
||||
| ((p)[1] & 0x3F)) \
|
||||
+ (((unsigned char) (p)[0]) < 0xC2 ? 0x3FFF80 : 0)) \
|
||||
: ! ((p)[0] & 0x10) \
|
||||
? ((((p)[0] & 0x0F) << 12) \
|
||||
| (((p)[1] & 0x3F) << 6) \
|
||||
| ((p)[2] & 0x3F)) \
|
||||
: string_char_with_unification (p, NULL, NULL))
|
||||
|
||||
|
||||
/* Like STRING_CHAR but set ACTUAL_LEN to the length of multibyte
|
||||
form. The argument LEN is ignored. It will be removed in the
|
||||
future. */
|
||||
|
||||
#define STRING_CHAR_AND_LENGTH(p, len, actual_len) \
|
||||
(!((p)[0] & 0x80) \
|
||||
? ((actual_len) = 1, (p)[0]) \
|
||||
: ! ((p)[0] & 0x20) \
|
||||
? ((actual_len) = 2, \
|
||||
(((((p)[0] & 0x1F) << 6) \
|
||||
| ((p)[1] & 0x3F)) \
|
||||
+ (((unsigned char) (p)[0]) < 0xC2 ? 0x3FFF80 : 0))) \
|
||||
: ! ((p)[0] & 0x10) \
|
||||
? ((actual_len) = 3, \
|
||||
((((p)[0] & 0x0F) << 12) \
|
||||
| (((p)[1] & 0x3F) << 6) \
|
||||
| ((p)[2] & 0x3F))) \
|
||||
: string_char_with_unification (p, NULL, &actual_len))
|
||||
|
||||
|
||||
/* Like STRING_CHAR but advacen P to the end of multibyte form. */
|
||||
|
||||
#define STRING_CHAR_ADVANCE(p) \
|
||||
(!((p)[0] & 0x80) \
|
||||
? *(p)++ \
|
||||
: ! ((p)[0] & 0x20) \
|
||||
? ((p) += 2, \
|
||||
((((p)[-2] & 0x1F) << 6) \
|
||||
| ((p)[-1] & 0x3F) \
|
||||
| (((unsigned char) (p)[-2]) < 0xC2 ? 0x3FFF80 : 0))) \
|
||||
: ! ((p)[0] & 0x10) \
|
||||
? ((p) += 3, \
|
||||
((((p)[-3] & 0x0F) << 12) \
|
||||
| (((p)[-2] & 0x3F) << 6) \
|
||||
| ((p)[-1] & 0x3F))) \
|
||||
: string_char_with_unification (p, &p, NULL))
|
||||
|
||||
|
||||
/* Fetch the "next" character from Lisp string STRING at byte position
|
||||
BYTEIDX, character position CHARIDX. Store it into OUTPUT.
|
||||
|
||||
All the args must be side-effect-free.
|
||||
BYTEIDX and CHARIDX must be lvalues;
|
||||
we increment them past the character fetched. */
|
||||
|
||||
#define FETCH_STRING_CHAR_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX) \
|
||||
if (1) \
|
||||
{ \
|
||||
CHARIDX++; \
|
||||
if (STRING_MULTIBYTE (STRING)) \
|
||||
{ \
|
||||
unsigned char *ptr = &XSTRING (STRING)->data[BYTEIDX]; \
|
||||
int len; \
|
||||
\
|
||||
OUTPUT = STRING_CHAR_AND_LENGTH (ptr, 0, len); \
|
||||
BYTEIDX += len; \
|
||||
} \
|
||||
else \
|
||||
OUTPUT = XSTRING (STRING)->data[BYTEIDX++]; \
|
||||
} \
|
||||
else
|
||||
|
||||
|
||||
/* Like FETCH_STRING_CHAR_ADVANCE but assumes STRING is multibyte. */
|
||||
|
||||
#define FETCH_STRING_CHAR_ADVANCE_NO_CHECK(OUTPUT, STRING, CHARIDX, BYTEIDX) \
|
||||
if (1) \
|
||||
{ \
|
||||
unsigned char *ptr = &XSTRING (STRING)->data[BYTEIDX]; \
|
||||
int len; \
|
||||
\
|
||||
OUTPUT = STRING_CHAR_AND_LENGTH (ptr, 0, len); \
|
||||
BYTEIDX += len; \
|
||||
CHARIDX++; \
|
||||
} \
|
||||
else
|
||||
|
||||
|
||||
/* Like FETCH_STRING_CHAR_ADVANCE but fetch character from the current
|
||||
buffer. */
|
||||
|
||||
#define FETCH_CHAR_ADVANCE(OUTPUT, CHARIDX, BYTEIDX) \
|
||||
if (1) \
|
||||
{ \
|
||||
CHARIDX++; \
|
||||
if (!NILP (current_buffer->enable_multibyte_characters)) \
|
||||
{ \
|
||||
unsigned char *ptr = BYTE_POS_ADDR (BYTEIDX); \
|
||||
int len; \
|
||||
\
|
||||
OUTPUT= STRING_CHAR_AND_LENGTH (ptr, 0, len); \
|
||||
BYTEIDX += len; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
OUTPUT = *(BYTE_POS_ADDR (BYTEIDX)); \
|
||||
BYTEIDX++; \
|
||||
} \
|
||||
} \
|
||||
else
|
||||
|
||||
|
||||
/* Like FETCH_CHAR_ADVANCE but assumes STRING is multibyte. */
|
||||
|
||||
#define FETCH_CHAR_ADVANCE_NO_CHECK(OUTPUT, CHARIDX, BYTEIDX) \
|
||||
if (1) \
|
||||
{ \
|
||||
unsigned char *ptr = BYTE_POS_ADDR (BYTEIDX); \
|
||||
int len; \
|
||||
\
|
||||
OUTPUT= STRING_CHAR_AND_LENGTH (ptr, 0, len); \
|
||||
BYTEIDX += len; \
|
||||
CHARIDX++; \
|
||||
} \
|
||||
else
|
||||
|
||||
|
||||
/* Increase the buffer byte position POS_BYTE of the current buffer to
|
||||
the next character boundary. No range checking of POS. */
|
||||
|
||||
#define INC_POS(pos_byte) \
|
||||
do { \
|
||||
unsigned char *p = BYTE_POS_ADDR (pos_byte); \
|
||||
pos_byte += BYTES_BY_CHAR_HEAD (*p); \
|
||||
} while (0)
|
||||
|
||||
|
||||
/* Decrease the buffer byte position POS_BYTE of the current buffer to
|
||||
the previous character boundary. No range checking of POS. */
|
||||
|
||||
#define DEC_POS(pos_byte) \
|
||||
do { \
|
||||
unsigned char *p; \
|
||||
\
|
||||
pos_byte--; \
|
||||
if (pos_byte < GPT_BYTE) \
|
||||
p = BEG_ADDR + pos_byte - 1; \
|
||||
else \
|
||||
p = BEG_ADDR + GAP_SIZE + pos_byte - 1; \
|
||||
while (!CHAR_HEAD_P (*p)) \
|
||||
{ \
|
||||
p--; \
|
||||
pos_byte--; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/* Increment both CHARPOS and BYTEPOS, each in the appropriate way. */
|
||||
|
||||
#define INC_BOTH(charpos, bytepos) \
|
||||
do \
|
||||
{ \
|
||||
(charpos)++; \
|
||||
if (NILP (current_buffer->enable_multibyte_characters)) \
|
||||
(bytepos)++; \
|
||||
else \
|
||||
INC_POS ((bytepos)); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
|
||||
/* Decrement both CHARPOS and BYTEPOS, each in the appropriate way. */
|
||||
|
||||
#define DEC_BOTH(charpos, bytepos) \
|
||||
do \
|
||||
{ \
|
||||
(charpos)--; \
|
||||
if (NILP (current_buffer->enable_multibyte_characters)) \
|
||||
(bytepos)--; \
|
||||
else \
|
||||
DEC_POS ((bytepos)); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
|
||||
/* Increase the buffer byte position POS_BYTE of the current buffer to
|
||||
the next character boundary. This macro relies on the fact that
|
||||
*GPT_ADDR and *Z_ADDR are always accessible and the values are
|
||||
'\0'. No range checking of POS_BYTE. */
|
||||
|
||||
#define BUF_INC_POS(buf, pos_byte) \
|
||||
do { \
|
||||
unsigned char *p = BUF_BYTE_ADDRESS (buf, pos_byte); \
|
||||
pos_byte += BYTES_BY_CHAR_HEAD (*p); \
|
||||
} while (0)
|
||||
|
||||
|
||||
/* Decrease the buffer byte position POS_BYTE of the current buffer to
|
||||
the previous character boundary. No range checking of POS_BYTE. */
|
||||
|
||||
#define BUF_DEC_POS(buf, pos_byte) \
|
||||
do { \
|
||||
unsigned char *p; \
|
||||
pos_byte--; \
|
||||
if (pos_byte < BUF_GPT_BYTE (buf)) \
|
||||
p = BUF_BEG_ADDR (buf) + pos_byte - 1; \
|
||||
else \
|
||||
p = BUF_BEG_ADDR (buf) + BUF_GAP_SIZE (buf) + pos_byte - 1; \
|
||||
while (!CHAR_HEAD_P (*p)) \
|
||||
{ \
|
||||
p--; \
|
||||
pos_byte--; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
|
||||
#define MAYBE_UNIFY_CHAR(c) \
|
||||
if (CHAR_TABLE_P (Vchar_unify_table)) \
|
||||
{ \
|
||||
Lisp_Object val; \
|
||||
int unified; \
|
||||
\
|
||||
val = CHAR_TABLE_REF (Vchar_unify_table, c); \
|
||||
if (SYMBOLP (val)) \
|
||||
{ \
|
||||
Funify_charset (val, Qnil); \
|
||||
val = CHAR_TABLE_REF (Vchar_unify_table, c); \
|
||||
} \
|
||||
if ((unified = XINT (val)) >= 0) \
|
||||
c = unified; \
|
||||
} \
|
||||
else
|
||||
|
||||
/* Return the width of ASCII character C. The width is measured by
|
||||
how many columns occupied on the screen when displayed in the
|
||||
current buffer. */
|
||||
|
||||
#define ASCII_CHAR_WIDTH(c) \
|
||||
(c < 0x20 \
|
||||
? (c == '\t' \
|
||||
? XFASTINT (current_buffer->tab_width) \
|
||||
: (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2))) \
|
||||
: (c < 0x7f \
|
||||
? 1 \
|
||||
: ((NILP (current_buffer->ctl_arrow) ? 4 : 2))))
|
||||
|
||||
/* Return the width of character C. The width is measured by how many
|
||||
columns occupied on the screen when displayed in the current
|
||||
buffer. */
|
||||
|
||||
#define CHAR_WIDTH(c) \
|
||||
(ASCII_CHAR_P (c) \
|
||||
? ASCII_CHAR_WIDTH (c) \
|
||||
: XINT (CHAR_TABLE_REF (Vchar_width_table, c)))
|
||||
|
||||
extern int char_string_with_unification P_ ((int, unsigned char *,
|
||||
unsigned char **));
|
||||
extern int string_char_with_unification P_ ((unsigned char *,
|
||||
unsigned char **, int *));
|
||||
|
||||
extern int translate_char P_ ((Lisp_Object, int c));
|
||||
extern int char_printable_p P_ ((int c));
|
||||
extern void parse_str_as_multibyte P_ ((unsigned char *, int, int *, int *));
|
||||
extern int parse_str_to_multibyte P_ ((unsigned char *, int));
|
||||
extern int str_as_multibyte P_ ((unsigned char *, int, int, int *));
|
||||
extern int str_to_multibyte P_ ((unsigned char *, int, int));
|
||||
extern int str_as_unibyte P_ ((unsigned char *, int));
|
||||
extern int strwidth P_ ((unsigned char *, int));
|
||||
extern int c_string_width P_ ((unsigned char *, int, int, int *, int *));
|
||||
extern int lisp_string_width P_ ((Lisp_Object, int, int *, int *));
|
||||
|
||||
extern Lisp_Object Vprintable_chars;
|
||||
|
||||
extern Lisp_Object Qcharacterp, Qauto_fill_chars;
|
||||
extern Lisp_Object Vtranslation_table_vector;
|
||||
extern Lisp_Object Vchar_width_table;
|
||||
extern Lisp_Object Vchar_direction_table;
|
||||
extern Lisp_Object Vchar_unify_table;
|
||||
|
||||
/* Return a translation table of id number ID. */
|
||||
#define GET_TRANSLATION_TABLE(id) \
|
||||
(XCDR(XVECTOR(Vtranslation_table_vector)->contents[(id)]))
|
||||
|
||||
/* A char-table for characters which may invoke auto-filling. */
|
||||
extern Lisp_Object Vauto_fill_chars;
|
||||
|
||||
/* Copy LEN bytes from FROM to TO. This macro should be used only
|
||||
when a caller knows that LEN is short and the obvious copy loop is
|
||||
faster than calling bcopy which has some overhead. Copying a
|
||||
multibyte sequence of a character is the typical case. */
|
||||
|
||||
#define BCOPY_SHORT(from, to, len) \
|
||||
do { \
|
||||
int i = len; \
|
||||
unsigned char *from_p = from, *to_p = to; \
|
||||
while (i--) *to_p++ = *from_p++; \
|
||||
} while (0)
|
||||
|
||||
#define DEFSYM(sym, name) \
|
||||
do { (sym) = intern ((name)); staticpro (&(sym)); } while (0)
|
||||
|
||||
#endif /* EMACS_CHARACTER_H */
|
||||
Loading…
Add table
Add a link
Reference in a new issue