Embed the Unicode database in the C library as a sequence of C arrays.

This commit is contained in:
Juan Jose Garcia Ripoll 2011-02-20 23:52:24 +00:00
parent 3771cb64c6
commit a136b663bd
6 changed files with 20 additions and 90 deletions

View file

@ -69,8 +69,6 @@ ecl/external.h: c/ecl/external.h c/ecl/external.h
cp -rf $(srcdir)/h/*.h $(srcdir)/h/impl ecl/
bin/ecl$(EXE): ecl_min$(EXE) compile.lsp ecl/external.h build-stamp $(top_srcdir)/lsp/*.lsp
cp $(top_srcdir)/../contrib/unicode/ucd.dat .
cp $(top_srcdir)/../contrib/unicode/ucd16.dat .
if [ -f CROSS-COMPILER ]; then \
./CROSS-COMPILER compile; \
else \
@ -144,8 +142,6 @@ install:
for i in $(TARGETS); do \
$(INSTALL_PROGRAM) $$i $(DESTDIR)$(bindir); \
done
$(INSTALL_DATA) ucd.dat $(DESTDIR)$(ecldir)/
$(INSTALL_DATA) ucd16.dat $(DESTDIR)$(ecldir)/
if [ -d encodings ]; then \
$(mkinstalldirs) $(DESTDIR)$(ecldir)/encodings; \
for i in ./encodings/*; do \

View file

@ -67,16 +67,20 @@ ecl_char_downcase(ecl_character code)
#else /* ECL_UNICODE */
extern const unsigned char ecl_ucd_misc_table[];
extern const unsigned char *ecl_ucd_page_table[];
extern const unsigned char ecl_ucd_page_table_1[];
/*
* 21-bits Unicode (0 to #x110000 char codes)
*/
#if ECL_UNICODE > 16
static uint8_t *
const unsigned char *
ucd_char_data(ecl_character code)
{
unsigned char page = cl_core.ucd_pages[code >> 8];
return cl_core.ucd_data + ((cl_index)page << 10) + 4 * (code & 0xFF);
const unsigned char *page = ecl_ucd_page_table[code >> 8];
return page + (4 * (code & 0xFF));
}
static cl_index
@ -85,7 +89,7 @@ ucd_value_0(ecl_character code)
return ucd_char_data(code)[0];
}
#define read_case_bytes(c) (c[0] + (c[1] << 8) + (c[3] << 16))
#define read_case_bytes(c) (c[1] + (c[2] << 8) + (c[3] << 16))
#endif
/*
@ -94,11 +98,11 @@ ucd_value_0(ecl_character code)
*/
#if ECL_UNICODE <= 16
static uint8_t *
const unsigned char *
ucd_char_data(ecl_character code)
{
unsigned char page = cl_core.ucd_pages[code >> 8];
return cl_core.ucd_data + ((cl_index)page * (256 * 3)) + 3 * (code & 0xFF);
const unsigned char *page = ecl_ucd_page_table[code >> 8];
return page + (3 * (code & 0xFF));
}
static cl_index
@ -107,19 +111,19 @@ ucd_value_0(ecl_character code)
return ucd_char_data(code)[0];
}
#define read_case_bytes(c) (c[0] + (c[1] << 8))
#define read_case_bytes(c) (c[1] + (c[2] << 8))
#endif
static int
ucd_general_category(ecl_character code)
{
return cl_core.ucd_misc[8 * ucd_value_0(code)];
return ecl_ucd_misc_table[8 * ucd_value_0(code)];
}
static int
ucd_decimal_digit(ecl_character code)
{
return cl_core.ucd_misc[3 + 8 * ucd_value_0(code)];
return ecl_ucd_misc_table[3 + 8 * ucd_value_0(code)];
}
bool
@ -163,9 +167,8 @@ ecl_alphanumericp(ecl_character i)
ecl_character
ecl_char_upcase(ecl_character code)
{
uint8_t *c = ucd_char_data(code);
const unsigned char *c = ucd_char_data(code);
if (c[0] == 1) {
c++;
return read_case_bytes(c);
} else {
return code;
@ -175,9 +178,8 @@ ecl_char_upcase(ecl_character code)
ecl_character
ecl_char_downcase(ecl_character code)
{
uint8_t *c = ucd_char_data(code);
const unsigned char *c = ucd_char_data(code);
if (c[0] == 0) {
c++;
return read_case_bytes(c);
} else {
return code;

View file

@ -262,61 +262,6 @@ cl_shutdown(void)
ecl_set_option(ECL_OPT_BOOTED, -1);
}
#ifdef ECL_UNICODE
static void
read_char_database()
{
#if ECL_UNICODE > 16
#define UCD "ucd.dat"
#else
#define UCD "ucd16.dat"
#endif
cl_object s = si_base_string_concatenate(2,
si_get_library_pathname(),
make_constant_base_string(UCD));
cl_object output = Cnil;
FILE *f = fopen((char *)s->base_string.self, "rb");
printf("%s\n", UCD);
if (f) {
cl_index size, read;
if (!fseek(f, 0, SEEK_END)) {
size = ftell(f);
fseek(f, 0, SEEK_SET);
output = ecl_alloc_simple_vector(size, aet_b8);
read = 0;
while (read < size) {
cl_index res;
res = fread(output->vector.self.b8 + read, 1, size - read, f);
if (res > 0) {
read += res;
} else {
output = Cnil;
break;
}
}
}
fclose(f);
}
if (output == Cnil) {
printf("Unable to read Unicode database: %s\n", s->base_string.self);
abort();
} else {
uint8_t *p = output->vector.self.b8;
cl_core.unicode_database = output;
cl_core.ucd_misc = p + 2;
cl_core.ucd_pages = cl_core.ucd_misc + (p[0] + (p[1]<<8));
#if ECL_UNICODE > 16
cl_core.ucd_data = cl_core.ucd_pages + (0x110000 / 256);
#else
cl_core.ucd_data = cl_core.ucd_pages + (65536 / 256);
#endif
}
ECL_SET(@'si::+unicode-database+', output);
}
#else
#define read_char_database() (void)0
#endif
ecl_def_ct_single_float(default_rehash_size,1.5f,static,const);
ecl_def_ct_single_float(default_rehash_threshold,0.75f,static,const);
ecl_def_ct_base_string(str_common_lisp,"COMMON-LISP",11,static,const);
@ -456,12 +401,6 @@ struct cl_core_struct cl_core = {
#endif
Cnil, /* signal_queue */
#ifdef ECL_UNICODE
Cnil, /* unicode_database */
NULL, /* ucd_misc */
NULL, /* ucd_pages */
NULL, /* ucd_data */
#endif
NULL, /* default_sigmask */
#ifdef ECL_THREADS
@ -629,11 +568,6 @@ cl_boot(int argc, char **argv)
ECL_SET(@'mp::*current-process*', env->own_process);
#endif
/*
* Initialize Unicode character database.
*/
read_char_database();
/*
* Load character names. The following hash table is a map
* from names to character codes and viceversa. Note that we

2
src/configure vendored
View file

@ -15257,6 +15257,7 @@ _ACEOF
CHAR_CODE_LIMIT=65536
ECL_CHARACTER=$ECL_INT16_T
EXTRA_OBJS="$EXTRA_OBJS unicode/ucd16.o unicode/ucd16-0000.o unicode/ucd16-0016.o unicode/ucd16-0032.o unicode/ucd16-0048.o unicode/ucd16-0064.o"
else
cat >>confdefs.h <<\_ACEOF
@ -15265,6 +15266,7 @@ _ACEOF
CHAR_CODE_LIMIT=1114112
ECL_CHARACTER=$ECL_INT32_T
EXTRA_OBJS="$EXTRA_OBJS unicode/ucd.o unicode/ucd-0000.o unicode/ucd-0016.o unicode/ucd-0032.o unicode/ucd-0048.o unicode/ucd-0064.o unicode/ucd-0080.o unicode/ucd-0096.o"
fi
else
CHAR_CODE_LIMIT=256

View file

@ -871,10 +871,12 @@ if test "x${enable_unicode}" != "xno"; then
AC_DEFINE(ECL_UNICODE, [16], [Support for Unicode])
CHAR_CODE_LIMIT=65536
ECL_CHARACTER=$ECL_INT16_T
EXTRA_OBJS="$EXTRA_OBJS unicode/ucd16.o unicode/ucd16-0000.o unicode/ucd16-0016.o unicode/ucd16-0032.o unicode/ucd16-0048.o unicode/ucd16-0064.o"
else
AC_DEFINE(ECL_UNICODE, [21], [Support for Unicode])
CHAR_CODE_LIMIT=1114112
ECL_CHARACTER=$ECL_INT32_T
EXTRA_OBJS="$EXTRA_OBJS unicode/ucd.o unicode/ucd-0000.o unicode/ucd-0016.o unicode/ucd-0032.o unicode/ucd-0048.o unicode/ucd-0064.o unicode/ucd-0080.o unicode/ucd-0096.o"
fi
else
CHAR_CODE_LIMIT=256

View file

@ -231,12 +231,6 @@ struct cl_core_struct {
#endif
cl_object signal_queue;
#ifdef ECL_UNICODE
cl_object unicode_database;
uint8_t *ucd_misc;
uint8_t *ucd_pages;
uint8_t *ucd_data;
#endif
void *default_sigmask;
#ifdef ECL_THREADS