mirror of
https://gitlab.com/embeddable-common-lisp/ecl.git
synced 2026-03-07 12:50:34 -08:00
Embed the Unicode database in the C library as a sequence of C arrays.
This commit is contained in:
parent
3771cb64c6
commit
a136b663bd
6 changed files with 20 additions and 90 deletions
|
|
@ -69,8 +69,6 @@ ecl/external.h: c/ecl/external.h c/ecl/external.h
|
|||
cp -rf $(srcdir)/h/*.h $(srcdir)/h/impl ecl/
|
||||
|
||||
bin/ecl$(EXE): ecl_min$(EXE) compile.lsp ecl/external.h build-stamp $(top_srcdir)/lsp/*.lsp
|
||||
cp $(top_srcdir)/../contrib/unicode/ucd.dat .
|
||||
cp $(top_srcdir)/../contrib/unicode/ucd16.dat .
|
||||
if [ -f CROSS-COMPILER ]; then \
|
||||
./CROSS-COMPILER compile; \
|
||||
else \
|
||||
|
|
@ -144,8 +142,6 @@ install:
|
|||
for i in $(TARGETS); do \
|
||||
$(INSTALL_PROGRAM) $$i $(DESTDIR)$(bindir); \
|
||||
done
|
||||
$(INSTALL_DATA) ucd.dat $(DESTDIR)$(ecldir)/
|
||||
$(INSTALL_DATA) ucd16.dat $(DESTDIR)$(ecldir)/
|
||||
if [ -d encodings ]; then \
|
||||
$(mkinstalldirs) $(DESTDIR)$(ecldir)/encodings; \
|
||||
for i in ./encodings/*; do \
|
||||
|
|
|
|||
|
|
@ -67,16 +67,20 @@ ecl_char_downcase(ecl_character code)
|
|||
|
||||
#else /* ECL_UNICODE */
|
||||
|
||||
extern const unsigned char ecl_ucd_misc_table[];
|
||||
extern const unsigned char *ecl_ucd_page_table[];
|
||||
extern const unsigned char ecl_ucd_page_table_1[];
|
||||
|
||||
/*
|
||||
* 21-bits Unicode (0 to #x110000 char codes)
|
||||
*/
|
||||
|
||||
#if ECL_UNICODE > 16
|
||||
static uint8_t *
|
||||
const unsigned char *
|
||||
ucd_char_data(ecl_character code)
|
||||
{
|
||||
unsigned char page = cl_core.ucd_pages[code >> 8];
|
||||
return cl_core.ucd_data + ((cl_index)page << 10) + 4 * (code & 0xFF);
|
||||
const unsigned char *page = ecl_ucd_page_table[code >> 8];
|
||||
return page + (4 * (code & 0xFF));
|
||||
}
|
||||
|
||||
static cl_index
|
||||
|
|
@ -85,7 +89,7 @@ ucd_value_0(ecl_character code)
|
|||
return ucd_char_data(code)[0];
|
||||
}
|
||||
|
||||
#define read_case_bytes(c) (c[0] + (c[1] << 8) + (c[3] << 16))
|
||||
#define read_case_bytes(c) (c[1] + (c[2] << 8) + (c[3] << 16))
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
|
@ -94,11 +98,11 @@ ucd_value_0(ecl_character code)
|
|||
*/
|
||||
|
||||
#if ECL_UNICODE <= 16
|
||||
static uint8_t *
|
||||
const unsigned char *
|
||||
ucd_char_data(ecl_character code)
|
||||
{
|
||||
unsigned char page = cl_core.ucd_pages[code >> 8];
|
||||
return cl_core.ucd_data + ((cl_index)page * (256 * 3)) + 3 * (code & 0xFF);
|
||||
const unsigned char *page = ecl_ucd_page_table[code >> 8];
|
||||
return page + (3 * (code & 0xFF));
|
||||
}
|
||||
|
||||
static cl_index
|
||||
|
|
@ -107,19 +111,19 @@ ucd_value_0(ecl_character code)
|
|||
return ucd_char_data(code)[0];
|
||||
}
|
||||
|
||||
#define read_case_bytes(c) (c[0] + (c[1] << 8))
|
||||
#define read_case_bytes(c) (c[1] + (c[2] << 8))
|
||||
#endif
|
||||
|
||||
static int
|
||||
ucd_general_category(ecl_character code)
|
||||
{
|
||||
return cl_core.ucd_misc[8 * ucd_value_0(code)];
|
||||
return ecl_ucd_misc_table[8 * ucd_value_0(code)];
|
||||
}
|
||||
|
||||
static int
|
||||
ucd_decimal_digit(ecl_character code)
|
||||
{
|
||||
return cl_core.ucd_misc[3 + 8 * ucd_value_0(code)];
|
||||
return ecl_ucd_misc_table[3 + 8 * ucd_value_0(code)];
|
||||
}
|
||||
|
||||
bool
|
||||
|
|
@ -163,9 +167,8 @@ ecl_alphanumericp(ecl_character i)
|
|||
ecl_character
|
||||
ecl_char_upcase(ecl_character code)
|
||||
{
|
||||
uint8_t *c = ucd_char_data(code);
|
||||
const unsigned char *c = ucd_char_data(code);
|
||||
if (c[0] == 1) {
|
||||
c++;
|
||||
return read_case_bytes(c);
|
||||
} else {
|
||||
return code;
|
||||
|
|
@ -175,9 +178,8 @@ ecl_char_upcase(ecl_character code)
|
|||
ecl_character
|
||||
ecl_char_downcase(ecl_character code)
|
||||
{
|
||||
uint8_t *c = ucd_char_data(code);
|
||||
const unsigned char *c = ucd_char_data(code);
|
||||
if (c[0] == 0) {
|
||||
c++;
|
||||
return read_case_bytes(c);
|
||||
} else {
|
||||
return code;
|
||||
|
|
|
|||
66
src/c/main.d
66
src/c/main.d
|
|
@ -262,61 +262,6 @@ cl_shutdown(void)
|
|||
ecl_set_option(ECL_OPT_BOOTED, -1);
|
||||
}
|
||||
|
||||
#ifdef ECL_UNICODE
|
||||
static void
|
||||
read_char_database()
|
||||
{
|
||||
#if ECL_UNICODE > 16
|
||||
#define UCD "ucd.dat"
|
||||
#else
|
||||
#define UCD "ucd16.dat"
|
||||
#endif
|
||||
cl_object s = si_base_string_concatenate(2,
|
||||
si_get_library_pathname(),
|
||||
make_constant_base_string(UCD));
|
||||
cl_object output = Cnil;
|
||||
FILE *f = fopen((char *)s->base_string.self, "rb");
|
||||
printf("%s\n", UCD);
|
||||
if (f) {
|
||||
cl_index size, read;
|
||||
if (!fseek(f, 0, SEEK_END)) {
|
||||
size = ftell(f);
|
||||
fseek(f, 0, SEEK_SET);
|
||||
output = ecl_alloc_simple_vector(size, aet_b8);
|
||||
read = 0;
|
||||
while (read < size) {
|
||||
cl_index res;
|
||||
res = fread(output->vector.self.b8 + read, 1, size - read, f);
|
||||
if (res > 0) {
|
||||
read += res;
|
||||
} else {
|
||||
output = Cnil;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
fclose(f);
|
||||
}
|
||||
if (output == Cnil) {
|
||||
printf("Unable to read Unicode database: %s\n", s->base_string.self);
|
||||
abort();
|
||||
} else {
|
||||
uint8_t *p = output->vector.self.b8;
|
||||
cl_core.unicode_database = output;
|
||||
cl_core.ucd_misc = p + 2;
|
||||
cl_core.ucd_pages = cl_core.ucd_misc + (p[0] + (p[1]<<8));
|
||||
#if ECL_UNICODE > 16
|
||||
cl_core.ucd_data = cl_core.ucd_pages + (0x110000 / 256);
|
||||
#else
|
||||
cl_core.ucd_data = cl_core.ucd_pages + (65536 / 256);
|
||||
#endif
|
||||
}
|
||||
ECL_SET(@'si::+unicode-database+', output);
|
||||
}
|
||||
#else
|
||||
#define read_char_database() (void)0
|
||||
#endif
|
||||
|
||||
ecl_def_ct_single_float(default_rehash_size,1.5f,static,const);
|
||||
ecl_def_ct_single_float(default_rehash_threshold,0.75f,static,const);
|
||||
ecl_def_ct_base_string(str_common_lisp,"COMMON-LISP",11,static,const);
|
||||
|
|
@ -456,12 +401,6 @@ struct cl_core_struct cl_core = {
|
|||
#endif
|
||||
Cnil, /* signal_queue */
|
||||
|
||||
#ifdef ECL_UNICODE
|
||||
Cnil, /* unicode_database */
|
||||
NULL, /* ucd_misc */
|
||||
NULL, /* ucd_pages */
|
||||
NULL, /* ucd_data */
|
||||
#endif
|
||||
NULL, /* default_sigmask */
|
||||
|
||||
#ifdef ECL_THREADS
|
||||
|
|
@ -629,11 +568,6 @@ cl_boot(int argc, char **argv)
|
|||
ECL_SET(@'mp::*current-process*', env->own_process);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Initialize Unicode character database.
|
||||
*/
|
||||
read_char_database();
|
||||
|
||||
/*
|
||||
* Load character names. The following hash table is a map
|
||||
* from names to character codes and viceversa. Note that we
|
||||
|
|
|
|||
2
src/configure
vendored
2
src/configure
vendored
|
|
@ -15257,6 +15257,7 @@ _ACEOF
|
|||
|
||||
CHAR_CODE_LIMIT=65536
|
||||
ECL_CHARACTER=$ECL_INT16_T
|
||||
EXTRA_OBJS="$EXTRA_OBJS unicode/ucd16.o unicode/ucd16-0000.o unicode/ucd16-0016.o unicode/ucd16-0032.o unicode/ucd16-0048.o unicode/ucd16-0064.o"
|
||||
else
|
||||
|
||||
cat >>confdefs.h <<\_ACEOF
|
||||
|
|
@ -15265,6 +15266,7 @@ _ACEOF
|
|||
|
||||
CHAR_CODE_LIMIT=1114112
|
||||
ECL_CHARACTER=$ECL_INT32_T
|
||||
EXTRA_OBJS="$EXTRA_OBJS unicode/ucd.o unicode/ucd-0000.o unicode/ucd-0016.o unicode/ucd-0032.o unicode/ucd-0048.o unicode/ucd-0064.o unicode/ucd-0080.o unicode/ucd-0096.o"
|
||||
fi
|
||||
else
|
||||
CHAR_CODE_LIMIT=256
|
||||
|
|
|
|||
|
|
@ -871,10 +871,12 @@ if test "x${enable_unicode}" != "xno"; then
|
|||
AC_DEFINE(ECL_UNICODE, [16], [Support for Unicode])
|
||||
CHAR_CODE_LIMIT=65536
|
||||
ECL_CHARACTER=$ECL_INT16_T
|
||||
EXTRA_OBJS="$EXTRA_OBJS unicode/ucd16.o unicode/ucd16-0000.o unicode/ucd16-0016.o unicode/ucd16-0032.o unicode/ucd16-0048.o unicode/ucd16-0064.o"
|
||||
else
|
||||
AC_DEFINE(ECL_UNICODE, [21], [Support for Unicode])
|
||||
CHAR_CODE_LIMIT=1114112
|
||||
ECL_CHARACTER=$ECL_INT32_T
|
||||
EXTRA_OBJS="$EXTRA_OBJS unicode/ucd.o unicode/ucd-0000.o unicode/ucd-0016.o unicode/ucd-0032.o unicode/ucd-0048.o unicode/ucd-0064.o unicode/ucd-0080.o unicode/ucd-0096.o"
|
||||
fi
|
||||
else
|
||||
CHAR_CODE_LIMIT=256
|
||||
|
|
|
|||
|
|
@ -231,12 +231,6 @@ struct cl_core_struct {
|
|||
#endif
|
||||
cl_object signal_queue;
|
||||
|
||||
#ifdef ECL_UNICODE
|
||||
cl_object unicode_database;
|
||||
uint8_t *ucd_misc;
|
||||
uint8_t *ucd_pages;
|
||||
uint8_t *ucd_data;
|
||||
#endif
|
||||
void *default_sigmask;
|
||||
|
||||
#ifdef ECL_THREADS
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue