mirror of
git://git.sv.gnu.org/emacs.git
synced 2026-01-12 22:40:46 -08:00
* configure.ac (PTY_OPEN, GC_MARK_SECONDARY_STACK):
Use bool for boolean.
* lib-src/emacsclient.c, lib-src/etags.c, lib-src/hexl.c (FALSE, TRUE):
Remove. All uses replaced with uncapitalized version.
* lib-src/emacsclient.c (message):
* lib-src/etags.c (make_tag, pfnote, consider_token, make_C_tag, lang_names):
* lib-src/hexl.c (un_flag, iso_flag, endian):
* lib-src/pop.c (pop_debug, pop_open, pop_multi_first, pop_multi_next)
(pop_trash):
Use bool for boolean.
* lib-src/etags.c (bool): Remove.
* lib-src/etags.c (globals, members, declarations, no_line_directive)
(no_duplicates): Use 'int' for boolean values that getopt requires
to be 'int'. Formerly, these were 'bool' and 'bool' was 'int',
but we can no longer rely on this implementation.
* lib-src/pop.h (struct _popserver): Use bool_bf for boolean bit-fields.
* lwlib/xlwmenuP.h (XlwMenu_part): Use bool_bf for boolean bit-fields.
* src/atimer.h, src/lisp.h, src/syssignal.h, src/syswait.h, src/unexelf.c:
No need to include <stdbool.h>, since conf_post.h does it now.
* src/buffer.h (BUF_COMPUTE_UNCHANGED, DECODE_POSITION)
(BUFFER_CHECK_INDIRECTION, GET_OVERLAYS_AT, PER_BUFFER_VALUE_P)
(SET_PER_BUFFER_VALUE_P):
* src/ccl.c, src/ccl.h (setup_ccl_program):
* src/ccl.h (CHECK_CCL_PROGRAM):
* src/character.h (MAKE_CHAR_UNIBYTE, CHECK_CHARACTER_CAR)
(CHECK_CHARACTER_CDR, CHAR_STRING_ADVANCE, NEXT_CHAR_BOUNDARY)
(PREV_CHAR_BOUNDARY, FETCH_STRING_CHAR_ADVANCE)
(FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE)
(FETCH_STRING_CHAR_ADVANCE_NO_CHECK, FETCH_CHAR_ADVANCE)
(FETCH_CHAR_ADVANCE_NO_CHECK, INC_POS, DEC_POS, INC_BOTH)
(DEC_BOTH, BUF_INC_POS, BUF_DEC_POS):
* src/charset.h (CHECK_CHARSET, CHECK_CHARSET_GET_ID)
(CHECK_CHARSET_GET_ATTR, CHECK_CHARSET_GET_CHARSET)
(CHARSET_FAST_MAP_SET):
* src/coding.c (decode_coding_ccl, encode_coding_ccl):
* src/coding.h (CHECK_CODING_SYSTEM, CHECK_CODING_SYSTEM_GET_SPEC)
(CHECK_CODING_SYSTEM_GET_ID, SJIS_TO_JIS, SJIS_TO_JIS2)
(JIS_TO_SJIS, JIS_TO_SJIS2, ENCODE_FILE, DECODE_FILE)
(ENCODE_SYSTEM, DECODE_SYSTEM, ENCODE_UTF_8)
(decode_coding_c_string):
* src/composite.h (COMPOSITION_DECODE_REFS, COMPOSITION_DECODE_RULE):
* src/conf_post.h (has_attribute):
* src/dispextern.h (trace_redisplay_p):
(INC_TEXT_POS, DEC_TEXT_POS, SET_GLYPH_FROM_GLYPH_CODE)
(SET_CHAR_GLYPH, SET_CHAR_GLYPH_FROM_GLYPH)
(SET_GLYPH_FROM_CHAR_GLYPH):
(WINDOW_WANTS_MODELINE_P, WINDOW_WANTS_HEADER_LINE_P)
(FACE_SUITABLE_FOR_ASCII_CHAR_P, FACE_SUITABLE_FOR_CHAR_P)
(PRODUCE_GLYPHS, reset_mouse_highlight, in_display_vector_p)
(cursor_in_mouse_face_p):
* src/dispnew.c (adjust_glyph_matrix, clear_glyph_matrix_rows)
(blank_row, prepare_desired_row)
(build_frame_matrix_from_leaf_window, make_current)
(mirror_make_current, mirrored_line_dance, mirror_line_dance)
(update_window, scrolling_window, update_frame_line):
* src/disptab.h (GLYPH_FOLLOW_ALIASES):
* src/editfns.c (Fformat):
* src/font.h (FONT_WEIGHT_SYMBOLIC, FONT_SLANT_SYMBOLIC)
(FONT_WIDTH_SYMBOLIC, FONT_WEIGHT_FOR_FACE, FONT_SLANT_FOR_FACE)
(FONT_WIDTH_FOR_FACE, FONT_WEIGHT_NAME_NUMERIC)
(FONT_SLANT_NAME_NUMERIC, FONT_WIDTH_NAME_NUMERIC)
(FONT_SET_STYLE, CHECK_FONT, CHECK_FONT_SPEC, CHECK_FONT_ENTITY)
(CHECK_FONT_OBJECT, CHECK_FONT_GET_OBJECT, FONT_ADD_LOG)
(FONT_DEFERRED_LOG):
* src/frame.h (FRAME_W32_P, FRAME_MSDOS_P, FRAME_WINDOW_P):
(FRAME_EXTERNAL_TOOL_BAR, FRAME_EXTERNAL_MENU_BAR, FOR_EACH_FRAME)
(FRAME_MOUSE_UPDATE):
* src/fringe.c (Fdefine_fringe_bitmap):
* src/image.c (x_create_bitmap_from_data, x_create_bitmap_mask)
(x_create_bitmap_from_xpm_data, xpm_load_image):
* src/intervals.h (INTERVAL_HAS_PARENT, INTERVAL_PARENT)
(set_interval_parent, RESET_INTERVAL, COPY_INTERVAL_CACHE)
(MERGE_INTERVAL_CACHE):
* src/keymap.h (KEYMAPP):
* src/lisp.h (eassert, USE_LSB_TAG, CHECK_LISP_OBJECT_TYPE)
(STRING_SET_UNIBYTE, STRING_SET_MULTIBYTE, DEFSYM, PSEUDOVECTORP)
(CHECK_RANGED_INTEGER, CHECK_TYPE_RANGED_INTEGER)
(CHECK_NUMBER_COERCE_MARKER, CHECK_NUMBER_OR_FLOAT_COERCE_MARKER)
(DEFVAR_LISP, DEFVAR_LISP_NOPRO, DEFVAR_BOOL, DEFVAR_INT)
(DEFVAR_BUFFER_DEFAULTS, DEFVAR_KBOARD, QUIT)
(RETURN_UNGCPRO, USE_SAFE_ALLOCA, SAFE_NALLOCA, SAFE_FREE)
(SAFE_ALLOCA_LISP, FOR_EACH_ALIST_VALUE, functionp):
* src/syntax.h (SYNTAX_ENTRY, SYNTAX_WITH_FLAGS, SYNTAX)
(UPDATE_SYNTAX_TABLE_FORWARD, UPDATE_SYNTAX_TABLE_BACKWARD)
(SETUP_BUFFER_SYNTAX_TABLE):
* src/systime.h (timespec_valid_p):
* src/term.c (save_and_enable_current_matrix):
* src/window.h (WINDOW_MENU_BAR_P, WINDOW_TOOL_BAR_P):
* src/xdisp.c (in_display_vector_p, display_tool_bar_line)
(redisplay_internal, try_window_reusing_current_matrix)
(sync_frame_with_window_matrix_rows, try_window_id)
(display_menu_bar, display_tty_menu_item, display_mode_line)
(coords_in_mouse_face_p, cursor_in_mouse_face_p):
* src/xmenu.c (xmenu_show):
* src/xterm.c (use_xim, x_term_init):
* src/xterm.h (XSync, GTK_CHECK_VERSION, use_xim, SET_SCROLL_BAR_X_WIDGET)
(struct x_bitmap_record):
Use bool for booleans.
* src/ccl.c (struct buffer_text):
* src/ccl.h (struct ccl_program):
* src/charset.h (struct charset):
* src/cm.h (struct cm):
* src/coding.h (struct iso_2022_spec, struct coding_system):
* src/dispextern.h (struct glyph, struct glyph_matrix, struct glyph_row)
(struct glyph_string, struct face, struct face_cache)
(struct bidi_string_data, struct bidi_it)
(struct draw_fringe_bitmap_params, struct it, Mouse_HLInfo)
(struct image):
* src/editfns.c (Fformat):
* src/frame.h (struct frame):
* src/fringe.c (struct fringe_bitmap):
* src/intervals.h (struct interval):
* src/keyboard.h (struct kboard):
* src/lisp.h (struct Lisp_Symbol, struct Lisp_Misc_Any, struct Lisp_Marker)
(struct Lisp_Overlay, struct Lisp_Save_Value, struct Lisp_Free)
(struct Lisp_Buffer_Local_Value, union specbinding):
* src/macfont.m (struct macfont_info):
* src/process.h (struct Lisp_Process):
* src/termchar.h (struct tty_display_info):
* src/window.h (struct window):
* src/xterm.h (struct x_output):
Use bool_bf for boolean bit-fields.
* src/ccl.c (setup_ccl_program): Now returns bool instead of -1 or 0.
All callers changed.
* src/ccl.h (struct ccl_program): Remove unused members private_state,
src_multibyte, dst_multibyte, cr_consumed, suppress_error,
eight_bit_control.
(struct ccl_spec): Remove unused members cr_carryover,
eight_bit_carryover.
* src/conf_post.h: Include <stdbool.h>.
(bool_bf): New type.
* src/dispextern.h (TRACE, PREPARE_FACE_FOR_DISPLAY):
* src/interval.h (RESET_INTERVAL, COPY_INTERVAL_CACHE, MERGE_INTERVAL_CACHE)
Surround statement macro with proper 'do { ... } while (false)' brackets.
(SET_MATRIX_ROW_ENABLED_P): Assume 2nd arg is bool.
(PRODUCE_GLYPHS): Simplify use of boolean.
* src/fileio.c (Fcopy_file):
If I is an integer, prefer 'if (I != 0)' to 'if (I)'.
* src/lisp.h (UNGCPRO): Return void, not int.
(FOR_EACH_TAIL): Use void expression, not int expression.
* src/region-cache.c: Reindent.
* src/region-cache.h: Copy comments from region-cache.c, to fix
incorrect remarks about booleans.
800 lines
25 KiB
C
800 lines
25 KiB
C
/* Header for coding system handler.
|
|
Copyright (C) 2001-2013 Free Software Foundation, Inc.
|
|
Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
|
|
2005, 2006, 2007, 2008, 2009, 2010, 2011
|
|
National Institute of Advanced Industrial Science and Technology (AIST)
|
|
Registration Number H14PRO021
|
|
Copyright (C) 2003
|
|
National Institute of Advanced Industrial Science and Technology (AIST)
|
|
Registration Number H13PRO009
|
|
|
|
This file is part of GNU Emacs.
|
|
|
|
GNU Emacs is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
GNU Emacs is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
|
|
|
|
#ifndef EMACS_CODING_H
|
|
#define EMACS_CODING_H
|
|
|
|
/* Index to arguments of Fdefine_coding_system_internal. */
|
|
|
|
enum define_coding_system_arg_index
|
|
{
|
|
coding_arg_name,
|
|
coding_arg_mnemonic,
|
|
coding_arg_coding_type,
|
|
coding_arg_charset_list,
|
|
coding_arg_ascii_compatible_p,
|
|
coding_arg_decode_translation_table,
|
|
coding_arg_encode_translation_table,
|
|
coding_arg_post_read_conversion,
|
|
coding_arg_pre_write_conversion,
|
|
coding_arg_default_char,
|
|
coding_arg_for_unibyte,
|
|
coding_arg_plist,
|
|
coding_arg_eol_type,
|
|
coding_arg_max
|
|
};
|
|
|
|
enum define_coding_iso2022_arg_index
|
|
{
|
|
coding_arg_iso2022_initial = coding_arg_max,
|
|
coding_arg_iso2022_reg_usage,
|
|
coding_arg_iso2022_request,
|
|
coding_arg_iso2022_flags,
|
|
coding_arg_iso2022_max
|
|
};
|
|
|
|
enum define_coding_utf8_arg_index
|
|
{
|
|
coding_arg_utf8_bom = coding_arg_max,
|
|
coding_arg_utf8_max
|
|
};
|
|
|
|
enum define_coding_utf16_arg_index
|
|
{
|
|
coding_arg_utf16_bom = coding_arg_max,
|
|
coding_arg_utf16_endian,
|
|
coding_arg_utf16_max
|
|
};
|
|
|
|
enum define_coding_ccl_arg_index
|
|
{
|
|
coding_arg_ccl_decoder = coding_arg_max,
|
|
coding_arg_ccl_encoder,
|
|
coding_arg_ccl_valids,
|
|
coding_arg_ccl_max
|
|
};
|
|
|
|
enum define_coding_undecided_arg_index
|
|
{
|
|
coding_arg_undecided_inhibit_null_byte_detection = coding_arg_max,
|
|
coding_arg_undecided_inhibit_iso_escape_detection,
|
|
coding_arg_undecided_prefer_utf_8,
|
|
coding_arg_undecided_max
|
|
};
|
|
|
|
/* Hash table for all coding systems. Keys are coding system symbols
|
|
and values are spec vectors of the corresponding coding system. A
|
|
spec vector has the form [ ATTRS ALIASES EOL-TYPE ]. ATTRS is a
|
|
vector of attribute of the coding system. ALIASES is a list of
|
|
aliases (symbols) of the coding system. EOL-TYPE is `unix', `dos',
|
|
`mac' or a vector of coding systems (symbols). */
|
|
|
|
extern Lisp_Object Vcoding_system_hash_table;
|
|
|
|
|
|
/* Enumeration of coding system type. */
|
|
|
|
enum coding_system_type
|
|
{
|
|
coding_type_charset,
|
|
coding_type_utf_8,
|
|
coding_type_utf_16,
|
|
coding_type_iso_2022,
|
|
coding_type_emacs_mule,
|
|
coding_type_sjis,
|
|
coding_type_ccl,
|
|
coding_type_raw_text,
|
|
coding_type_undecided,
|
|
coding_type_max
|
|
};
|
|
|
|
|
|
/* Enumeration of end-of-line format type. */
|
|
|
|
enum end_of_line_type
|
|
{
|
|
eol_lf, /* Line-feed only, same as Emacs' internal
|
|
format. */
|
|
eol_crlf, /* Sequence of carriage-return and
|
|
line-feed. */
|
|
eol_cr, /* Carriage-return only. */
|
|
eol_any, /* Accept any of above. Produce line-feed
|
|
only. */
|
|
eol_undecided, /* This value is used to denote that the
|
|
eol-type is not yet undecided. */
|
|
eol_type_max
|
|
};
|
|
|
|
/* Enumeration of index to an attribute vector of a coding system. */
|
|
|
|
enum coding_attr_index
|
|
{
|
|
coding_attr_base_name,
|
|
coding_attr_docstring,
|
|
coding_attr_mnemonic,
|
|
coding_attr_type,
|
|
coding_attr_charset_list,
|
|
coding_attr_ascii_compat,
|
|
coding_attr_decode_tbl,
|
|
coding_attr_encode_tbl,
|
|
coding_attr_trans_tbl,
|
|
coding_attr_post_read,
|
|
coding_attr_pre_write,
|
|
coding_attr_default_char,
|
|
coding_attr_for_unibyte,
|
|
coding_attr_plist,
|
|
|
|
coding_attr_category,
|
|
coding_attr_safe_charsets,
|
|
|
|
/* The followings are extra attributes for each type. */
|
|
coding_attr_charset_valids,
|
|
|
|
coding_attr_ccl_decoder,
|
|
coding_attr_ccl_encoder,
|
|
coding_attr_ccl_valids,
|
|
|
|
coding_attr_iso_initial,
|
|
coding_attr_iso_usage,
|
|
coding_attr_iso_request,
|
|
coding_attr_iso_flags,
|
|
|
|
coding_attr_utf_bom,
|
|
coding_attr_utf_16_endian,
|
|
|
|
coding_attr_emacs_mule_full,
|
|
|
|
coding_attr_undecided_inhibit_null_byte_detection,
|
|
coding_attr_undecided_inhibit_iso_escape_detection,
|
|
coding_attr_undecided_prefer_utf_8,
|
|
|
|
coding_attr_last_index
|
|
};
|
|
|
|
|
|
/* Macros to access an element of an attribute vector. */
|
|
|
|
#define CODING_ATTR_BASE_NAME(attrs) AREF (attrs, coding_attr_base_name)
|
|
#define CODING_ATTR_TYPE(attrs) AREF (attrs, coding_attr_type)
|
|
#define CODING_ATTR_CHARSET_LIST(attrs) AREF (attrs, coding_attr_charset_list)
|
|
#define CODING_ATTR_MNEMONIC(attrs) AREF (attrs, coding_attr_mnemonic)
|
|
#define CODING_ATTR_DOCSTRING(attrs) AREF (attrs, coding_attr_docstring)
|
|
#define CODING_ATTR_ASCII_COMPAT(attrs) AREF (attrs, coding_attr_ascii_compat)
|
|
#define CODING_ATTR_DECODE_TBL(attrs) AREF (attrs, coding_attr_decode_tbl)
|
|
#define CODING_ATTR_ENCODE_TBL(attrs) AREF (attrs, coding_attr_encode_tbl)
|
|
#define CODING_ATTR_TRANS_TBL(attrs) AREF (attrs, coding_attr_trans_tbl)
|
|
#define CODING_ATTR_POST_READ(attrs) AREF (attrs, coding_attr_post_read)
|
|
#define CODING_ATTR_PRE_WRITE(attrs) AREF (attrs, coding_attr_pre_write)
|
|
#define CODING_ATTR_DEFAULT_CHAR(attrs) AREF (attrs, coding_attr_default_char)
|
|
#define CODING_ATTR_FOR_UNIBYTE(attrs) AREF (attrs, coding_attr_for_unibyte)
|
|
#define CODING_ATTR_PLIST(attrs) AREF (attrs, coding_attr_plist)
|
|
#define CODING_ATTR_CATEGORY(attrs) AREF (attrs, coding_attr_category)
|
|
#define CODING_ATTR_SAFE_CHARSETS(attrs)AREF (attrs, coding_attr_safe_charsets)
|
|
|
|
|
|
/* Return the name of a coding system specified by ID. */
|
|
#define CODING_ID_NAME(id) \
|
|
(HASH_KEY (XHASH_TABLE (Vcoding_system_hash_table), id))
|
|
|
|
/* Return the attribute vector of a coding system specified by ID. */
|
|
|
|
#define CODING_ID_ATTRS(id) \
|
|
(AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 0))
|
|
|
|
/* Return the list of aliases of a coding system specified by ID. */
|
|
|
|
#define CODING_ID_ALIASES(id) \
|
|
(AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 1))
|
|
|
|
/* Return the eol-type of a coding system specified by ID. */
|
|
|
|
#define CODING_ID_EOL_TYPE(id) \
|
|
(AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 2))
|
|
|
|
|
|
/* Return the spec vector of CODING_SYSTEM_SYMBOL. */
|
|
|
|
#define CODING_SYSTEM_SPEC(coding_system_symbol) \
|
|
(Fgethash (coding_system_symbol, Vcoding_system_hash_table, Qnil))
|
|
|
|
|
|
/* Return the ID of CODING_SYSTEM_SYMBOL. */
|
|
|
|
#define CODING_SYSTEM_ID(coding_system_symbol) \
|
|
hash_lookup (XHASH_TABLE (Vcoding_system_hash_table), \
|
|
coding_system_symbol, NULL)
|
|
|
|
/* Return true if CODING_SYSTEM_SYMBOL is a coding system. */
|
|
|
|
#define CODING_SYSTEM_P(coding_system_symbol) \
|
|
(CODING_SYSTEM_ID (coding_system_symbol) >= 0 \
|
|
|| (! NILP (coding_system_symbol) \
|
|
&& ! NILP (Fcoding_system_p (coding_system_symbol))))
|
|
|
|
/* Check if X is a coding system or not. */
|
|
|
|
#define CHECK_CODING_SYSTEM(x) \
|
|
do { \
|
|
if (CODING_SYSTEM_ID (x) < 0 \
|
|
&& NILP (Fcheck_coding_system (x))) \
|
|
wrong_type_argument (Qcoding_system_p, (x)); \
|
|
} while (false)
|
|
|
|
|
|
/* Check if X is a coding system or not. If it is, set SEPC to the
|
|
spec vector of the coding system. */
|
|
|
|
#define CHECK_CODING_SYSTEM_GET_SPEC(x, spec) \
|
|
do { \
|
|
spec = CODING_SYSTEM_SPEC (x); \
|
|
if (NILP (spec)) \
|
|
{ \
|
|
Fcheck_coding_system (x); \
|
|
spec = CODING_SYSTEM_SPEC (x); \
|
|
} \
|
|
if (NILP (spec)) \
|
|
wrong_type_argument (Qcoding_system_p, (x)); \
|
|
} while (false)
|
|
|
|
|
|
/* Check if X is a coding system or not. If it is, set ID to the
|
|
ID of the coding system. */
|
|
|
|
#define CHECK_CODING_SYSTEM_GET_ID(x, id) \
|
|
do \
|
|
{ \
|
|
id = CODING_SYSTEM_ID (x); \
|
|
if (id < 0) \
|
|
{ \
|
|
Fcheck_coding_system (x); \
|
|
id = CODING_SYSTEM_ID (x); \
|
|
} \
|
|
if (id < 0) \
|
|
wrong_type_argument (Qcoding_system_p, (x)); \
|
|
} while (false)
|
|
|
|
|
|
/*** GENERAL section ***/
|
|
|
|
/* Enumeration of result code of code conversion. */
|
|
enum coding_result_code
|
|
{
|
|
CODING_RESULT_SUCCESS,
|
|
CODING_RESULT_INSUFFICIENT_SRC,
|
|
CODING_RESULT_INSUFFICIENT_DST,
|
|
CODING_RESULT_INVALID_SRC,
|
|
CODING_RESULT_INTERRUPT
|
|
};
|
|
|
|
|
|
/* Macros used for the member `mode' of the struct coding_system. */
|
|
|
|
/* If set, the decoding/encoding routines treat the current data as
|
|
the last block of the whole text to be converted, and do the
|
|
appropriate finishing job. */
|
|
#define CODING_MODE_LAST_BLOCK 0x01
|
|
|
|
/* If set, it means that the current source text is in a buffer which
|
|
enables selective display. */
|
|
#define CODING_MODE_SELECTIVE_DISPLAY 0x02
|
|
|
|
/* This flag is used by the decoding/encoding routines on the fly. If
|
|
set, it means that right-to-left text is being processed. */
|
|
#define CODING_MODE_DIRECTION 0x04
|
|
|
|
#define CODING_MODE_FIXED_DESTINATION 0x08
|
|
|
|
/* If set, it means that the encoding routines produces some safe
|
|
ASCII characters (usually '?') for unsupported characters. */
|
|
#define CODING_MODE_SAFE_ENCODING 0x10
|
|
|
|
/* For handling composition sequence. */
|
|
#include "composite.h"
|
|
|
|
enum composition_state
|
|
{
|
|
COMPOSING_NO,
|
|
COMPOSING_CHAR,
|
|
COMPOSING_RULE,
|
|
COMPOSING_COMPONENT_CHAR,
|
|
COMPOSING_COMPONENT_RULE
|
|
};
|
|
|
|
/* Structure for the current composition status. */
|
|
struct composition_status
|
|
{
|
|
enum composition_state state;
|
|
enum composition_method method;
|
|
bool old_form; /* true if pre-21 form */
|
|
int length; /* number of elements produced in charbuf */
|
|
int nchars; /* number of characters composed */
|
|
int ncomps; /* number of composition components */
|
|
/* Maximum carryover is for the case of COMPOSITION_WITH_RULE_ALTCHARS.
|
|
See the comment in coding.c. */
|
|
int carryover[4 /* annotation header */
|
|
+ MAX_COMPOSITION_COMPONENTS * 3 - 2 /* ALTs and RULEs */
|
|
+ 2 /* intermediate -1 -1 */
|
|
+ MAX_COMPOSITION_COMPONENTS /* CHARs */
|
|
];
|
|
};
|
|
|
|
|
|
/* Structure of the field `spec.iso_2022' in the structure
|
|
`coding_system'. */
|
|
struct iso_2022_spec
|
|
{
|
|
/* Bit-wise-or of CODING_ISO_FLAG_XXX. */
|
|
unsigned flags;
|
|
|
|
/* The current graphic register invoked to each graphic plane. */
|
|
int current_invocation[2];
|
|
|
|
/* The current charset designated to each graphic register. The
|
|
value -1 means that not charset is designated, -2 means that
|
|
there was an invalid designation previously. */
|
|
int current_designation[4];
|
|
|
|
/* If positive, we are now scanning CTEXT extended segment. */
|
|
int ctext_extended_segment_len;
|
|
|
|
/* True temporarily only when graphic register 2 or 3 is invoked by
|
|
single-shift while encoding. */
|
|
bool_bf single_shifting : 1;
|
|
|
|
/* True temporarily only when processing at beginning of line. */
|
|
bool_bf bol : 1;
|
|
|
|
/* If true, we are now scanning embedded UTF-8 sequence. */
|
|
bool_bf embedded_utf_8 : 1;
|
|
|
|
/* The current composition. */
|
|
struct composition_status cmp_status;
|
|
};
|
|
|
|
struct emacs_mule_spec
|
|
{
|
|
struct composition_status cmp_status;
|
|
};
|
|
|
|
struct undecided_spec
|
|
{
|
|
/* Inhibit null byte detection. 1 means always inhibit,
|
|
-1 means do not inhibit, 0 means rely on user variable. */
|
|
int inhibit_nbd;
|
|
|
|
/* Inhibit ISO escape detection. -1, 0, 1 as above. */
|
|
int inhibit_ied;
|
|
|
|
/* Prefer UTF-8 when the input could be other encodings. */
|
|
bool prefer_utf_8;
|
|
};
|
|
|
|
enum utf_bom_type
|
|
{
|
|
utf_detect_bom,
|
|
utf_without_bom,
|
|
utf_with_bom
|
|
};
|
|
|
|
enum utf_16_endian_type
|
|
{
|
|
utf_16_big_endian,
|
|
utf_16_little_endian
|
|
};
|
|
|
|
struct utf_16_spec
|
|
{
|
|
enum utf_bom_type bom;
|
|
enum utf_16_endian_type endian;
|
|
int surrogate;
|
|
};
|
|
|
|
struct coding_detection_info
|
|
{
|
|
/* Values of these members are bitwise-OR of CATEGORY_MASK_XXXs. */
|
|
/* Which categories are already checked. */
|
|
int checked;
|
|
/* Which categories are strongly found. */
|
|
int found;
|
|
/* Which categories are rejected. */
|
|
int rejected;
|
|
};
|
|
|
|
|
|
struct coding_system
|
|
{
|
|
/* ID number of the coding system. This is an index to
|
|
Vcoding_system_hash_table. This value is set by
|
|
setup_coding_system. At the early stage of building time, this
|
|
value is -1 in the array coding_categories to indicate that no
|
|
coding-system of that category is yet defined. */
|
|
ptrdiff_t id;
|
|
|
|
/* Flag bits of the coding system. The meaning of each bit is common
|
|
to all types of coding systems. */
|
|
int common_flags;
|
|
|
|
/* Mode bits of the coding system. See the comments of the macros
|
|
CODING_MODE_XXX. */
|
|
unsigned int mode;
|
|
|
|
/* Detailed information specific to each type of coding system. */
|
|
union
|
|
{
|
|
struct iso_2022_spec iso_2022;
|
|
struct ccl_spec *ccl; /* Defined in ccl.h. */
|
|
struct utf_16_spec utf_16;
|
|
enum utf_bom_type utf_8_bom;
|
|
struct emacs_mule_spec emacs_mule;
|
|
struct undecided_spec undecided;
|
|
} spec;
|
|
|
|
int max_charset_id;
|
|
unsigned char *safe_charsets;
|
|
|
|
/* The following two members specify how binary 8-bit code 128..255
|
|
are represented in source and destination text respectively. True
|
|
means they are represented by 2-byte sequence, false means they are
|
|
represented by 1-byte as is (see the comment in character.h). */
|
|
bool_bf src_multibyte : 1;
|
|
bool_bf dst_multibyte : 1;
|
|
|
|
/* How may heading bytes we can skip for decoding. This is set to
|
|
-1 in setup_coding_system, and updated by detect_coding. So,
|
|
when this is equal to the byte length of the text being
|
|
converted, we can skip the actual conversion process except for
|
|
the eol format. */
|
|
ptrdiff_t head_ascii;
|
|
|
|
ptrdiff_t detected_utf8_chars;
|
|
|
|
/* Used internally in coding.c. See the comment of detect_ascii. */
|
|
int eol_seen;
|
|
|
|
/* The following members are set by encoding/decoding routine. */
|
|
ptrdiff_t produced, produced_char, consumed, consumed_char;
|
|
|
|
/* Number of error source data found in a decoding routine. */
|
|
int errors;
|
|
|
|
/* Store the positions of error source data. */
|
|
ptrdiff_t *error_positions;
|
|
|
|
/* Finish status of code conversion. */
|
|
enum coding_result_code result;
|
|
|
|
ptrdiff_t src_pos, src_pos_byte, src_chars, src_bytes;
|
|
Lisp_Object src_object;
|
|
const unsigned char *source;
|
|
|
|
ptrdiff_t dst_pos, dst_pos_byte, dst_bytes;
|
|
Lisp_Object dst_object;
|
|
unsigned char *destination;
|
|
|
|
/* If an element is non-negative, it is a character code.
|
|
|
|
If it is in the range -128..-1, it is a 8-bit character code
|
|
minus 256.
|
|
|
|
If it is less than -128, it specifies the start of an annotation
|
|
chunk. The length of the chunk is -128 minus the value of the
|
|
element. The following elements are OFFSET, ANNOTATION-TYPE, and
|
|
a sequence of actual data for the annotation. OFFSET is a
|
|
character position offset from dst_pos or src_pos,
|
|
ANNOTATION-TYPE specifies the meaning of the annotation and how to
|
|
handle the following data.. */
|
|
int *charbuf;
|
|
int charbuf_size, charbuf_used;
|
|
|
|
/* True if the source of conversion is not in the member
|
|
`charbuf', but at `src_object'. */
|
|
bool_bf chars_at_source : 1;
|
|
|
|
/* Nonzero if the result of conversion is in `destination'
|
|
buffer rather than in `dst_object'. */
|
|
bool_bf raw_destination : 1;
|
|
|
|
/* Set to true if charbuf contains an annotation. */
|
|
bool_bf annotated : 1;
|
|
|
|
unsigned char carryover[64];
|
|
int carryover_bytes;
|
|
|
|
int default_char;
|
|
|
|
bool (*detector) (struct coding_system *, struct coding_detection_info *);
|
|
void (*decoder) (struct coding_system *);
|
|
bool (*encoder) (struct coding_system *);
|
|
};
|
|
|
|
/* Meanings of bits in the member `common_flags' of the structure
|
|
coding_system. The lowest 8 bits are reserved for various kind of
|
|
annotations (currently two of them are used). */
|
|
#define CODING_ANNOTATION_MASK 0x00FF
|
|
#define CODING_ANNOTATE_COMPOSITION_MASK 0x0001
|
|
#define CODING_ANNOTATE_DIRECTION_MASK 0x0002
|
|
#define CODING_ANNOTATE_CHARSET_MASK 0x0003
|
|
#define CODING_FOR_UNIBYTE_MASK 0x0100
|
|
#define CODING_REQUIRE_FLUSHING_MASK 0x0200
|
|
#define CODING_REQUIRE_DECODING_MASK 0x0400
|
|
#define CODING_REQUIRE_ENCODING_MASK 0x0800
|
|
#define CODING_REQUIRE_DETECTION_MASK 0x1000
|
|
#define CODING_RESET_AT_BOL_MASK 0x2000
|
|
|
|
/* Return nonzero if the coding context CODING requires annotation
|
|
handling. */
|
|
#define CODING_REQUIRE_ANNOTATION(coding) \
|
|
((coding)->common_flags & CODING_ANNOTATION_MASK)
|
|
|
|
/* Return nonzero if the coding context CODING prefers decoding into
|
|
unibyte. */
|
|
#define CODING_FOR_UNIBYTE(coding) \
|
|
((coding)->common_flags & CODING_FOR_UNIBYTE_MASK)
|
|
|
|
/* Return nonzero if the coding context CODING requires specific code to be
|
|
attached at the tail of converted text. */
|
|
#define CODING_REQUIRE_FLUSHING(coding) \
|
|
((coding)->common_flags & CODING_REQUIRE_FLUSHING_MASK)
|
|
|
|
/* Return nonzero if the coding context CODING requires code conversion on
|
|
decoding. */
|
|
#define CODING_REQUIRE_DECODING(coding) \
|
|
((coding)->dst_multibyte \
|
|
|| (coding)->common_flags & CODING_REQUIRE_DECODING_MASK)
|
|
|
|
|
|
/* Return nonzero if the coding context CODING requires code conversion on
|
|
encoding.
|
|
The non-multibyte part of the condition is to support encoding of
|
|
unibyte strings/buffers generated by string-as-unibyte or
|
|
(set-buffer-multibyte nil) from multibyte strings/buffers. */
|
|
#define CODING_REQUIRE_ENCODING(coding) \
|
|
((coding)->src_multibyte \
|
|
|| (coding)->common_flags & CODING_REQUIRE_ENCODING_MASK \
|
|
|| (coding)->mode & CODING_MODE_SELECTIVE_DISPLAY)
|
|
|
|
|
|
/* Return nonzero if the coding context CODING requires some kind of code
|
|
detection. */
|
|
#define CODING_REQUIRE_DETECTION(coding) \
|
|
((coding)->common_flags & CODING_REQUIRE_DETECTION_MASK)
|
|
|
|
/* Return nonzero if the coding context CODING requires code conversion on
|
|
decoding or some kind of code detection. */
|
|
#define CODING_MAY_REQUIRE_DECODING(coding) \
|
|
(CODING_REQUIRE_DECODING (coding) \
|
|
|| CODING_REQUIRE_DETECTION (coding))
|
|
|
|
/* Macros to decode or encode a character of JISX0208 in SJIS. S1 and
|
|
S2 are the 1st and 2nd position-codes of JISX0208 in SJIS coding
|
|
system. C1 and C2 are the 1st and 2nd position codes of Emacs'
|
|
internal format. */
|
|
|
|
#define SJIS_TO_JIS(code) \
|
|
do { \
|
|
int s1, s2, j1, j2; \
|
|
\
|
|
s1 = (code) >> 8, s2 = (code) & 0xFF; \
|
|
\
|
|
if (s2 >= 0x9F) \
|
|
(j1 = s1 * 2 - (s1 >= 0xE0 ? 0x160 : 0xE0), \
|
|
j2 = s2 - 0x7E); \
|
|
else \
|
|
(j1 = s1 * 2 - ((s1 >= 0xE0) ? 0x161 : 0xE1), \
|
|
j2 = s2 - ((s2 >= 0x7F) ? 0x20 : 0x1F)); \
|
|
(code) = (j1 << 8) | j2; \
|
|
} while (false)
|
|
|
|
#define SJIS_TO_JIS2(code) \
|
|
do { \
|
|
int s1, s2, j1, j2; \
|
|
\
|
|
s1 = (code) >> 8, s2 = (code) & 0xFF; \
|
|
\
|
|
if (s2 >= 0x9F) \
|
|
{ \
|
|
j1 = (s1 == 0xF0 ? 0x28 \
|
|
: s1 == 0xF1 ? 0x24 \
|
|
: s1 == 0xF2 ? 0x2C \
|
|
: s1 == 0xF3 ? 0x2E \
|
|
: 0x6E + (s1 - 0xF4) * 2); \
|
|
j2 = s2 - 0x7E; \
|
|
} \
|
|
else \
|
|
{ \
|
|
j1 = (s1 <= 0xF2 ? 0x21 + (s1 - 0xF0) * 2 \
|
|
: s1 <= 0xF4 ? 0x2D + (s1 - 0xF3) * 2 \
|
|
: 0x6F + (s1 - 0xF5) * 2); \
|
|
j2 = s2 - ((s2 >= 0x7F ? 0x20 : 0x1F)); \
|
|
} \
|
|
(code) = (j1 << 8) | j2; \
|
|
} while (false)
|
|
|
|
|
|
#define JIS_TO_SJIS(code) \
|
|
do { \
|
|
int s1, s2, j1, j2; \
|
|
\
|
|
j1 = (code) >> 8, j2 = (code) & 0xFF; \
|
|
if (j1 & 1) \
|
|
(s1 = j1 / 2 + ((j1 < 0x5F) ? 0x71 : 0xB1), \
|
|
s2 = j2 + ((j2 >= 0x60) ? 0x20 : 0x1F)); \
|
|
else \
|
|
(s1 = j1 / 2 + ((j1 < 0x5F) ? 0x70 : 0xB0), \
|
|
s2 = j2 + 0x7E); \
|
|
(code) = (s1 << 8) | s2; \
|
|
} while (false)
|
|
|
|
#define JIS_TO_SJIS2(code) \
|
|
do { \
|
|
int s1, s2, j1, j2; \
|
|
\
|
|
j1 = (code) >> 8, j2 = (code) & 0xFF; \
|
|
if (j1 & 1) \
|
|
{ \
|
|
s1 = (j1 <= 0x25 ? 0xF0 + (j1 - 0x21) / 2 \
|
|
: j1 <= 0x2F ? 0xF3 + (j1 - 0x2D) / 2 \
|
|
: 0xF5 + (j1 - 0x6F) / 2); \
|
|
s2 = j2 + ((j2 >= 0x60) ? 0x20 : 0x1F); \
|
|
} \
|
|
else \
|
|
{ \
|
|
s1 = (j1 == 0x28 ? 0xF0 \
|
|
: j1 == 0x24 ? 0xF1 \
|
|
: j1 == 0x2C ? 0xF2 \
|
|
: j1 == 0x2E ? 0xF3 \
|
|
: 0xF4 + (j1 - 0x6E) / 2); \
|
|
s2 = j2 + 0x7E; \
|
|
} \
|
|
(code) = (s1 << 8) | s2; \
|
|
} while (false)
|
|
|
|
/* Encode the file name NAME using the specified coding system
|
|
for file names, if any. */
|
|
#define ENCODE_FILE(NAME) encode_file_name (NAME)
|
|
|
|
/* Decode the file name NAME using the specified coding system
|
|
for file names, if any. */
|
|
#define DECODE_FILE(NAME) decode_file_name (NAME)
|
|
|
|
/* Encode the string STR using the specified coding system
|
|
for system functions, if any. */
|
|
#define ENCODE_SYSTEM(str) \
|
|
(! NILP (Vlocale_coding_system) \
|
|
? code_convert_string_norecord (str, Vlocale_coding_system, true) \
|
|
: str)
|
|
|
|
/* Decode the string STR using the specified coding system
|
|
for system functions, if any. */
|
|
#define DECODE_SYSTEM(str) \
|
|
(! NILP (Vlocale_coding_system) \
|
|
? code_convert_string_norecord (str, Vlocale_coding_system, false) \
|
|
: str)
|
|
|
|
/* Note that this encodes utf-8, not utf-8-emacs, so it's not a no-op. */
|
|
#define ENCODE_UTF_8(str) code_convert_string_norecord (str, Qutf_8, true)
|
|
|
|
/* Extern declarations. */
|
|
extern Lisp_Object code_conversion_save (bool, bool);
|
|
extern void setup_coding_system (Lisp_Object, struct coding_system *);
|
|
extern Lisp_Object coding_charset_list (struct coding_system *);
|
|
extern Lisp_Object coding_system_charset_list (Lisp_Object);
|
|
extern Lisp_Object code_convert_string (Lisp_Object, Lisp_Object,
|
|
Lisp_Object, bool, bool, bool);
|
|
extern Lisp_Object code_convert_string_norecord (Lisp_Object, Lisp_Object,
|
|
bool);
|
|
extern Lisp_Object encode_file_name (Lisp_Object);
|
|
extern Lisp_Object decode_file_name (Lisp_Object);
|
|
extern Lisp_Object raw_text_coding_system (Lisp_Object);
|
|
extern Lisp_Object coding_inherit_eol_type (Lisp_Object, Lisp_Object);
|
|
extern Lisp_Object complement_process_encoding_system (Lisp_Object);
|
|
|
|
extern void decode_coding_gap (struct coding_system *,
|
|
ptrdiff_t, ptrdiff_t);
|
|
extern void decode_coding_object (struct coding_system *,
|
|
Lisp_Object, ptrdiff_t, ptrdiff_t,
|
|
ptrdiff_t, ptrdiff_t, Lisp_Object);
|
|
extern void encode_coding_object (struct coding_system *,
|
|
Lisp_Object, ptrdiff_t, ptrdiff_t,
|
|
ptrdiff_t, ptrdiff_t, Lisp_Object);
|
|
|
|
#if defined (WINDOWSNT) || defined (CYGWIN)
|
|
|
|
/* These functions use Lisp string objects to store the UTF-16LE
|
|
strings that modern versions of Windows expect. These strings are
|
|
not particularly useful to Lisp, and all Lisp strings should be
|
|
native Emacs multibyte. */
|
|
|
|
/* Access the wide-character string stored in a Lisp string object. */
|
|
#define WCSDATA(x) ((wchar_t *) SDATA (x))
|
|
|
|
/* Convert the multi-byte string in STR to UTF-16LE encoded unibyte
|
|
string, and store it in *BUF. BUF may safely point to STR on entry. */
|
|
extern wchar_t *to_unicode (Lisp_Object str, Lisp_Object *buf);
|
|
|
|
/* Convert STR, a UTF-16LE encoded string embedded in a unibyte string
|
|
object, to a multi-byte Emacs string and return it. This function
|
|
calls code_convert_string_norecord internally and has all its
|
|
failure modes. STR itself is not modified. */
|
|
extern Lisp_Object from_unicode (Lisp_Object str);
|
|
|
|
/* Convert WSTR to an Emacs string. */
|
|
extern Lisp_Object from_unicode_buffer (const wchar_t* wstr);
|
|
|
|
#endif /* WINDOWSNT || CYGWIN */
|
|
|
|
/* Macros for backward compatibility. */
|
|
|
|
#define encode_coding_string(coding, string, nocopy) \
|
|
(STRING_MULTIBYTE(string) ? \
|
|
(encode_coding_object (coding, string, 0, 0, SCHARS (string), \
|
|
SBYTES (string), Qt), \
|
|
(coding)->dst_object) : (string))
|
|
|
|
|
|
#define decode_coding_c_string(coding, src, bytes, dst_object) \
|
|
do { \
|
|
(coding)->source = (src); \
|
|
(coding)->src_chars = (coding)->src_bytes = (bytes); \
|
|
decode_coding_object ((coding), Qnil, 0, 0, (bytes), (bytes), \
|
|
(dst_object)); \
|
|
} while (false)
|
|
|
|
|
|
extern Lisp_Object preferred_coding_system (void);
|
|
|
|
|
|
extern Lisp_Object Qutf_8, Qutf_8_emacs;
|
|
|
|
extern Lisp_Object Qcoding_category_index;
|
|
extern Lisp_Object Qcoding_system_p;
|
|
extern Lisp_Object Qraw_text, Qemacs_mule, Qno_conversion, Qundecided;
|
|
extern Lisp_Object Qbuffer_file_coding_system;
|
|
|
|
extern Lisp_Object Qunix, Qdos;
|
|
|
|
extern Lisp_Object Qtranslation_table;
|
|
extern Lisp_Object Qtranslation_table_id;
|
|
|
|
#ifdef emacs
|
|
extern Lisp_Object Qfile_coding_system;
|
|
extern Lisp_Object Qcall_process, Qcall_process_region;
|
|
extern Lisp_Object Qstart_process, Qopen_network_stream;
|
|
extern Lisp_Object Qwrite_region;
|
|
|
|
extern char *emacs_strerror (int);
|
|
|
|
/* Coding system to be used to encode text for terminal display when
|
|
terminal coding system is nil. */
|
|
extern struct coding_system safe_terminal_coding;
|
|
|
|
#endif
|
|
|
|
/* Error signaled when there's a problem with detecting coding system */
|
|
extern Lisp_Object Qcoding_system_error;
|
|
|
|
extern char emacs_mule_bytes[256];
|
|
|
|
#endif /* EMACS_CODING_H */
|