mirror of
https://gitlab.com/embeddable-common-lisp/ecl.git
synced 2026-03-13 08:20:31 -07:00
Merge branch 'string-extensions' into 'develop'
Extend API for converting strings to and from different encodings See merge request embeddable-common-lisp/ecl!257
This commit is contained in:
commit
89fd8c53f4
13 changed files with 599 additions and 28 deletions
238
src/c/file.d
238
src/c/file.d
|
|
@ -4457,6 +4457,77 @@ seq_in_unread_char(cl_object strm, ecl_character c)
|
|||
strm->stream.byte_stack = ECL_NIL;
|
||||
}
|
||||
|
||||
#ifdef ecl_uint16_t
|
||||
static ecl_character
|
||||
seq_in_ucs2_read_char(cl_object strm)
|
||||
{
|
||||
cl_fixnum curr_pos = SEQ_INPUT_POSITION(strm);
|
||||
cl_fixnum last = SEQ_INPUT_LIMIT(strm);
|
||||
if (curr_pos >= last) {
|
||||
return EOF;
|
||||
}
|
||||
cl_object vector = SEQ_INPUT_VECTOR(strm);
|
||||
ecl_character c = vector->vector.self.b16[curr_pos++];
|
||||
cl_object err;
|
||||
if (c >= 0xD800 && c <= 0xDBFF) {
|
||||
if (curr_pos >= last) {
|
||||
err = ecl_list1(ecl_make_fixnum(c));
|
||||
goto DECODING_ERROR;
|
||||
}
|
||||
ecl_character aux = vector->vector.self.b16[curr_pos++];
|
||||
if (aux < 0xDC00 || aux > 0xDFFF) {
|
||||
err = cl_list(2, ecl_make_fixnum(c), ecl_make_fixnum(aux));
|
||||
goto DECODING_ERROR;
|
||||
}
|
||||
c = ((c & 0x3FFF) << 10) + (aux & 0x3FFF) + 0x10000;
|
||||
}
|
||||
SEQ_INPUT_POSITION(strm) = curr_pos;
|
||||
return c;
|
||||
cl_object code;
|
||||
DECODING_ERROR:
|
||||
code = _ecl_funcall4(@'ext::decoding-error', strm,
|
||||
cl_stream_external_format(strm),
|
||||
err);
|
||||
if (Null(code)) {
|
||||
/* Go for next character */
|
||||
return seq_in_ucs2_read_char(strm);
|
||||
} else {
|
||||
/* Return supplied character */
|
||||
return ecl_char_code(code);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
seq_in_ucs2_unread_char(cl_object strm, ecl_character c)
|
||||
{
|
||||
if (c >= 0x10000) {
|
||||
SEQ_INPUT_POSITION(strm) -= 2;
|
||||
} else {
|
||||
SEQ_INPUT_POSITION(strm) -= 1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef ecl_uint32_t
|
||||
static ecl_character
|
||||
seq_in_ucs4_read_char(cl_object strm)
|
||||
{
|
||||
cl_fixnum curr_pos = SEQ_INPUT_POSITION(strm);
|
||||
if (curr_pos >= SEQ_INPUT_LIMIT(strm)) {
|
||||
return EOF;
|
||||
}
|
||||
cl_object vector = SEQ_INPUT_VECTOR(strm);
|
||||
SEQ_INPUT_POSITION(strm) += 1;
|
||||
return vector->vector.self.b32[curr_pos];
|
||||
}
|
||||
|
||||
static void
|
||||
seq_in_ucs4_unread_char(cl_object strm, ecl_character c)
|
||||
{
|
||||
SEQ_INPUT_POSITION(strm) -= 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
seq_in_listen(cl_object strm)
|
||||
{
|
||||
|
|
@ -4530,11 +4601,10 @@ make_sequence_input_stream(cl_object vector, cl_index istart, cl_index iend,
|
|||
cl_object type_name;
|
||||
int byte_size;
|
||||
int flags = 0;
|
||||
if (!ECL_VECTORP(vector) ||
|
||||
ecl_aet_size[type = ecl_array_elttype(vector)] != 1)
|
||||
{
|
||||
FEerror("MAKE-SEQUENCE-INPUT-STREAM only accepts vectors whose element has a size of 1 byte.~%~A", 1, vector);
|
||||
}
|
||||
if (!ECL_VECTORP(vector)) {
|
||||
FEwrong_type_nth_arg(@[ext::make-sequence-input-stream], 1, vector, @[vector]);
|
||||
}
|
||||
type = ecl_array_elttype(vector);
|
||||
type_name = ecl_elttype_to_symbol(type);
|
||||
byte_size = ecl_normalize_stream_element_type(type_name);
|
||||
/* Character streams always get some external format. For binary
|
||||
|
|
@ -4545,9 +4615,32 @@ make_sequence_input_stream(cl_object vector, cl_index istart, cl_index iend,
|
|||
if (!byte_size && Null(external_format)) {
|
||||
external_format = @':default';
|
||||
}
|
||||
set_stream_elt_type(strm, byte_size, flags, external_format);
|
||||
/* Override byte size */
|
||||
if (byte_size) strm->stream.byte_size = 8;
|
||||
if (ecl_aet_size[type] == 1) {
|
||||
set_stream_elt_type(strm, byte_size, flags, external_format);
|
||||
/* Override byte size */
|
||||
if (byte_size) strm->stream.byte_size = 8;
|
||||
}
|
||||
#ifdef ecl_uint16_t
|
||||
else if (ecl_aet_size[type] == 2 && external_format == @':ucs-2') {
|
||||
IO_STREAM_ELT_TYPE(strm) = @'character';
|
||||
strm->stream.format = @':ucs-2';
|
||||
strm->stream.byte_size = 2*8;
|
||||
strm->stream.ops->read_char = seq_in_ucs2_read_char;
|
||||
strm->stream.ops->unread_char = seq_in_ucs2_unread_char;
|
||||
}
|
||||
#endif
|
||||
#ifdef ecl_uint32_t
|
||||
else if (ecl_aet_size[type] == 4 && external_format == @':ucs-4') {
|
||||
IO_STREAM_ELT_TYPE(strm) = @'character';
|
||||
strm->stream.format = @':ucs-4';
|
||||
strm->stream.byte_size = 4*8;
|
||||
strm->stream.ops->read_char = seq_in_ucs4_read_char;
|
||||
strm->stream.ops->unread_char = seq_in_ucs4_unread_char;
|
||||
}
|
||||
#endif
|
||||
else {
|
||||
FEerror("Illegal combination of external-format ~A and input vector ~A for MAKE-SEQUENCE-INPUT-STREAM.~%", 2, external_format, vector);
|
||||
}
|
||||
SEQ_INPUT_VECTOR(strm) = vector;
|
||||
SEQ_INPUT_POSITION(strm) = istart;
|
||||
SEQ_INPUT_LIMIT(strm) = iend;
|
||||
|
|
@ -4570,6 +4663,18 @@ make_sequence_input_stream(cl_object vector, cl_index istart, cl_index iend,
|
|||
* SEQUENCE OUTPUT STREAMS
|
||||
*/
|
||||
|
||||
static void
|
||||
seq_out_enlarge_vector(cl_object strm)
|
||||
{
|
||||
cl_object vector = SEQ_OUTPUT_VECTOR(strm);
|
||||
if (!ECL_ADJUSTABLE_ARRAY_P(vector)) {
|
||||
FEerror("Can't adjust the dimensions of the sequence of sequence stream ~A", 1, strm);
|
||||
}
|
||||
vector = _ecl_funcall3(@'adjust-array', vector,
|
||||
ecl_ash(ecl_make_fixnum(vector->vector.dim), 1));
|
||||
SEQ_OUTPUT_VECTOR(strm) = vector;
|
||||
}
|
||||
|
||||
static cl_index
|
||||
seq_out_write_byte8(cl_object strm, unsigned char *c, cl_index n)
|
||||
{
|
||||
|
|
@ -4580,13 +4685,7 @@ seq_out_write_byte8(cl_object strm, unsigned char *c, cl_index n)
|
|||
cl_fixnum last = vector->vector.dim;
|
||||
cl_fixnum delta = last - curr_pos;
|
||||
if (delta < n) {
|
||||
/* Not enough space, enlarge */
|
||||
if (!ECL_ADJUSTABLE_ARRAY_P(vector)) {
|
||||
FEerror("Can't adjust the dimensions of the sequence of sequence stream ~A", 1, strm);
|
||||
}
|
||||
vector = _ecl_funcall3(@'adjust-array', vector,
|
||||
ecl_ash(ecl_make_fixnum(last), 1));
|
||||
SEQ_OUTPUT_VECTOR(strm) = vector;
|
||||
seq_out_enlarge_vector(strm);
|
||||
goto AGAIN;
|
||||
}
|
||||
memcpy(vector->vector.self.bc + curr_pos, c, n);
|
||||
|
|
@ -4597,6 +4696,55 @@ seq_out_write_byte8(cl_object strm, unsigned char *c, cl_index n)
|
|||
return n;
|
||||
}
|
||||
|
||||
#ifdef ecl_uint16_t
|
||||
static ecl_character
|
||||
seq_out_ucs2_write_char(cl_object strm, ecl_character c)
|
||||
{
|
||||
AGAIN:
|
||||
{
|
||||
cl_object vector = SEQ_OUTPUT_VECTOR(strm);
|
||||
cl_fixnum curr_pos = SEQ_OUTPUT_POSITION(strm);
|
||||
cl_fixnum n = (c >= 0x10000) ? 2 : 1;
|
||||
if (vector->vector.dim - curr_pos < n) {
|
||||
seq_out_enlarge_vector(strm);
|
||||
goto AGAIN;
|
||||
}
|
||||
if (c >= 0x10000) {
|
||||
c -= 0x10000;
|
||||
vector->vector.self.b16[curr_pos++] = (c >> 10) | 0xD800;
|
||||
vector->vector.self.b16[curr_pos++] = (c & 0x3FFF) | 0xDC00;
|
||||
} else {
|
||||
vector->vector.self.b16[curr_pos++] = c;
|
||||
}
|
||||
SEQ_OUTPUT_POSITION(strm) = curr_pos;
|
||||
if (vector->vector.fillp < curr_pos)
|
||||
vector->vector.fillp = curr_pos;
|
||||
}
|
||||
return c;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef ecl_uint32_t
|
||||
static ecl_character
|
||||
seq_out_ucs4_write_char(cl_object strm, ecl_character c)
|
||||
{
|
||||
AGAIN:
|
||||
{
|
||||
cl_object vector = SEQ_OUTPUT_VECTOR(strm);
|
||||
cl_fixnum curr_pos = SEQ_OUTPUT_POSITION(strm);
|
||||
if (vector->vector.dim - curr_pos < 1) {
|
||||
seq_out_enlarge_vector(strm);
|
||||
goto AGAIN;
|
||||
}
|
||||
vector->vector.self.b32[curr_pos++] = c;
|
||||
SEQ_OUTPUT_POSITION(strm) = curr_pos;
|
||||
if (vector->vector.fillp < curr_pos)
|
||||
vector->vector.fillp = curr_pos;
|
||||
}
|
||||
return c;
|
||||
}
|
||||
#endif
|
||||
|
||||
static cl_object
|
||||
seq_out_get_position(cl_object strm)
|
||||
{
|
||||
|
|
@ -4661,11 +4809,10 @@ make_sequence_output_stream(cl_object vector, cl_object external_format)
|
|||
cl_object type_name;
|
||||
int byte_size;
|
||||
int flags = 0;
|
||||
if (!ECL_VECTORP(vector) ||
|
||||
ecl_aet_size[type = ecl_array_elttype(vector)] != 1)
|
||||
{
|
||||
FEerror("MAKE-SEQUENCE-OUTPUT-STREAM only accepts vectors whose element has a size of 1 byte.~%~A", 1, vector);
|
||||
}
|
||||
if (!ECL_VECTORP(vector)) {
|
||||
FEwrong_type_nth_arg(@[ext::make-sequence-output-stream], 1, vector, @[vector]);
|
||||
}
|
||||
type = ecl_array_elttype(vector);
|
||||
type_name = ecl_elttype_to_symbol(type);
|
||||
byte_size = ecl_normalize_stream_element_type(type_name);
|
||||
/* Character streams always get some external format. For binary
|
||||
|
|
@ -4676,9 +4823,30 @@ make_sequence_output_stream(cl_object vector, cl_object external_format)
|
|||
if (!byte_size && Null(external_format)) {
|
||||
external_format = @':default';
|
||||
}
|
||||
set_stream_elt_type(strm, byte_size, flags, external_format);
|
||||
/* Override byte size */
|
||||
if (byte_size) strm->stream.byte_size = 8;
|
||||
if (ecl_aet_size[type] == 1) {
|
||||
set_stream_elt_type(strm, byte_size, flags, external_format);
|
||||
/* Override byte size */
|
||||
if (byte_size) strm->stream.byte_size = 8;
|
||||
}
|
||||
#ifdef ecl_uint16_t
|
||||
else if (ecl_aet_size[type] == 2 && external_format == @':ucs-2') {
|
||||
IO_STREAM_ELT_TYPE(strm) = @'character';
|
||||
strm->stream.format = @':ucs-2';
|
||||
strm->stream.byte_size = 2*8;
|
||||
strm->stream.ops->write_char = seq_out_ucs2_write_char;
|
||||
}
|
||||
#endif
|
||||
#ifdef ecl_uint32_t
|
||||
else if (ecl_aet_size[type] == 4 && external_format == @':ucs-4') {
|
||||
IO_STREAM_ELT_TYPE(strm) = @'character';
|
||||
strm->stream.format = @':ucs-4';
|
||||
strm->stream.byte_size = 4*8;
|
||||
strm->stream.ops->write_char = seq_out_ucs4_write_char;
|
||||
}
|
||||
#endif
|
||||
else {
|
||||
FEerror("Illegal combination of external-format ~A and output vector ~A for MAKE-SEQUENCE-OUTPUT-STREAM.~%", 2, external_format, vector);
|
||||
}
|
||||
SEQ_OUTPUT_VECTOR(strm) = vector;
|
||||
SEQ_OUTPUT_POSITION(strm) = vector->vector.fillp;
|
||||
return strm;
|
||||
|
|
@ -5203,7 +5371,29 @@ ecl_normalize_stream_element_type(cl_object element_type)
|
|||
return -8;
|
||||
} else if (element_type == @'unsigned-byte' || element_type == @'ext::byte8') {
|
||||
return 8;
|
||||
} else if (element_type == @':default') {
|
||||
}
|
||||
#ifdef ecl_uint16_t
|
||||
else if (element_type == @'ext::integer16') {
|
||||
return -16;
|
||||
} else if (element_type == @'ext::byte16') {
|
||||
return 16;
|
||||
}
|
||||
#endif
|
||||
#ifdef ecl_uint32_t
|
||||
else if (element_type == @'ext::integer32') {
|
||||
return -32;
|
||||
} else if (element_type == @'ext::byte32') {
|
||||
return 32;
|
||||
}
|
||||
#endif
|
||||
#ifdef ecl_uint64_t
|
||||
else if (element_type == @'ext::integer64') {
|
||||
return -64;
|
||||
} else if (element_type == @'ext::byte64') {
|
||||
return 64;
|
||||
}
|
||||
#endif
|
||||
else if (element_type == @':default') {
|
||||
return 0;
|
||||
} else if (element_type == @'base-char' || element_type == @'character') {
|
||||
return 0;
|
||||
|
|
|
|||
173
src/c/string.d
173
src/c/string.d
|
|
@ -70,7 +70,7 @@ do_make_string(cl_index s, ecl_character code)
|
|||
@)
|
||||
|
||||
/*
|
||||
Make a string of a certain size, with some eading zeros to
|
||||
Make a string of a certain size, with some leading zeros to
|
||||
keep C happy. The string must be adjustable, to allow further
|
||||
growth. (See unixfsys.c for its use).
|
||||
*/
|
||||
|
|
@ -895,3 +895,174 @@ nstring_case(cl_narg narg, cl_object fun, ecl_casefun casefun, ecl_va_list ARGS)
|
|||
}
|
||||
@(return output);
|
||||
@)
|
||||
|
||||
@(defun ext::octets-to-string (input &key
|
||||
(external_format @':default')
|
||||
(start ecl_make_fixnum(0))
|
||||
(end ECL_NIL))
|
||||
cl_object output;
|
||||
cl_index input_size;
|
||||
cl_object input_stream;
|
||||
cl_index output_size;
|
||||
cl_object ret;
|
||||
@
|
||||
output = si_get_buffer_string();
|
||||
input_stream = si_make_sequence_input_stream(7, input,
|
||||
@':external-format', external_format,
|
||||
@':start', start,
|
||||
@':end', end);
|
||||
/* INV: MAKE-SEQUENCE-INPUT-STREAM checks types of start and end indices */
|
||||
input_size = (Null(end) ? ecl_length(input) : ecl_fixnum(end)) - ecl_fixnum(start);
|
||||
output_size = 0;
|
||||
do {
|
||||
output->base_string.fillp = output->base_string.dim;
|
||||
output_size += ecl_to_unsigned_integer(si_do_read_sequence(output, input_stream,
|
||||
ecl_make_fixnum(output_size),
|
||||
ecl_make_fixnum(output->base_string.dim)));
|
||||
if (output_size < output->base_string.dim) {
|
||||
break;
|
||||
}
|
||||
output = _ecl_funcall3(@'adjust-array', output,
|
||||
ecl_make_fixnum(input_size > output_size
|
||||
? input_size
|
||||
: output_size + 128));
|
||||
} while (1);
|
||||
output->base_string.fillp = output_size;
|
||||
if (ecl_fits_in_base_string(output)) {
|
||||
ret = si_copy_to_simple_base_string(output);
|
||||
} else {
|
||||
ret = cl_copy_seq(output);
|
||||
}
|
||||
si_put_buffer_string(output);
|
||||
@(return ret);
|
||||
@)
|
||||
|
||||
@(defun ext::string-to-octets (input &key
|
||||
(external_format @':default')
|
||||
(start ecl_make_fixnum(0))
|
||||
(end ECL_NIL)
|
||||
(null_terminate ECL_NIL)
|
||||
(element_type @'ext::byte8'))
|
||||
cl_object output;
|
||||
cl_object output_stream;
|
||||
@
|
||||
output = si_make_vector(element_type, /* element-type */
|
||||
cl_length(input), /* length */
|
||||
ECL_T, /* adjustable */
|
||||
ecl_make_fixnum(0), /* fillp */
|
||||
ECL_NIL, /* displaced */
|
||||
ECL_NIL); /* displaced-offset */
|
||||
output_stream = si_make_sequence_output_stream(3, output,
|
||||
@':external-format', external_format);
|
||||
si_do_write_sequence(input, output_stream, start, end);
|
||||
if (!Null(null_terminate)) {
|
||||
ecl_write_char(0, output_stream);
|
||||
}
|
||||
@(return output);
|
||||
@)
|
||||
|
||||
cl_object
|
||||
ecl_decode_from_cstring(const char *s, cl_fixnum len, cl_object encoding)
|
||||
{
|
||||
volatile cl_object ret;
|
||||
ECL_HANDLER_CASE_BEGIN(ecl_process_env(), ecl_list1(@'ext::character-decoding-error')) {
|
||||
ret = si_octets_to_string(3, ecl_make_constant_base_string(s, len), @':external-format', encoding);
|
||||
} ECL_HANDLER_CASE(1, c) {
|
||||
ret = c; /* suppress "unused variable `c`" warning */
|
||||
ret = OBJNULL;
|
||||
} ECL_HANDLER_CASE_END;
|
||||
return ret;
|
||||
}
|
||||
|
||||
cl_fixnum
|
||||
ecl_encode_to_cstring(char *output, cl_fixnum output_len, cl_object input, cl_object encoding)
|
||||
{
|
||||
volatile cl_fixnum ret;
|
||||
ECL_HANDLER_CASE_BEGIN(ecl_process_env(), ecl_list1(@'ext::character-encoding-error')) {
|
||||
cl_object output_vec = si_string_to_octets(3, input, @':external-format', encoding);
|
||||
ret = output_vec->vector.fillp + 1;
|
||||
if (ret <= output_len) {
|
||||
memcpy(output, output_vec->vector.self.b8, (ret-1)*sizeof(char));
|
||||
output[ret-1] = 0; /* null-terminator */
|
||||
}
|
||||
} ECL_HANDLER_CASE(1, c) {
|
||||
input = c; /* suppress "unused variable `c`" warning */
|
||||
ret = -1;
|
||||
} ECL_HANDLER_CASE_END;
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef HAVE_WCHAR_H
|
||||
cl_object
|
||||
ecl_decode_from_unicode_wstring(const wchar_t *s, cl_fixnum len)
|
||||
{
|
||||
cl_object input;
|
||||
cl_object elttype;
|
||||
cl_object encoding;
|
||||
volatile cl_object ret;
|
||||
if (len < 0) {
|
||||
len = wcslen(s);
|
||||
}
|
||||
switch (sizeof(wchar_t)) {
|
||||
case 1:
|
||||
elttype = @'ext::byte8';
|
||||
encoding = @':utf-8';
|
||||
break;
|
||||
case 2:
|
||||
elttype = @'ext::byte16';
|
||||
encoding = @':ucs-2';
|
||||
break;
|
||||
case 4:
|
||||
elttype = @'ext::byte32';
|
||||
encoding = @':ucs-4';
|
||||
break;
|
||||
default:
|
||||
ecl_internal_error("Unexpected sizeof(wchar_t)");
|
||||
}
|
||||
input = si_make_vector(elttype, ecl_make_fixnum(len), ECL_NIL, ECL_NIL, ECL_NIL, ECL_NIL);
|
||||
memcpy(input->vector.self.b8, s, len*sizeof(wchar_t));
|
||||
ECL_HANDLER_CASE_BEGIN(ecl_process_env(), ecl_list1(@'ext::character-decoding-error')) {
|
||||
ret = si_octets_to_string(3, input, @':external-format', encoding);
|
||||
} ECL_HANDLER_CASE(1, c) {
|
||||
ret = c; /* suppress "unused variable `c`" warning */
|
||||
ret = OBJNULL;
|
||||
} ECL_HANDLER_CASE_END;
|
||||
return ret;
|
||||
}
|
||||
|
||||
cl_fixnum
|
||||
ecl_encode_to_unicode_wstring(wchar_t *output, cl_fixnum output_len, cl_object input)
|
||||
{
|
||||
cl_object elttype;
|
||||
cl_object encoding;
|
||||
volatile cl_fixnum ret;
|
||||
switch (sizeof(wchar_t)) {
|
||||
case 1:
|
||||
elttype = @'ext::byte8';
|
||||
encoding = @':utf-8';
|
||||
break;
|
||||
case 2:
|
||||
elttype = @'ext::byte16';
|
||||
encoding = @':ucs-2';
|
||||
break;
|
||||
case 4:
|
||||
elttype = @'ext::byte32';
|
||||
encoding = @':ucs-4';
|
||||
break;
|
||||
default:
|
||||
ecl_internal_error("Unexpected sizeof(wchar_t)");
|
||||
}
|
||||
ECL_HANDLER_CASE_BEGIN(ecl_process_env(), ecl_list1(@'ext::character-encoding-error')) {
|
||||
cl_object output_vec = si_string_to_octets(5, input, @':external-format', encoding, @':element-type', elttype);
|
||||
ret = output_vec->vector.fillp + 1;
|
||||
if (ret <= output_len) {
|
||||
memcpy(output, output_vec->vector.self.b8, (ret-1)*sizeof(wchar_t));
|
||||
output[ret-1] = 0; /* null-terminator */
|
||||
}
|
||||
} ECL_HANDLER_CASE(1, c) {
|
||||
input = c; /* suppress "unused variable `c`" warning */
|
||||
ret = -1;
|
||||
} ECL_HANDLER_CASE_END;
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -2183,6 +2183,9 @@ cl_symbols[] = {
|
|||
{EXT_ "STREAM-ENCODING-ERROR" ECL_FUN(NULL, NULL, -1) ECL_VAR(EXT_ORDINARY, OBJNULL)},
|
||||
{EXT_ "DECODING-ERROR" ECL_FUN(NULL, NULL, 3) ECL_VAR(EXT_ORDINARY, OBJNULL)},
|
||||
{EXT_ "ENCODING-ERROR" ECL_FUN(NULL, NULL, 3) ECL_VAR(EXT_ORDINARY, OBJNULL)},
|
||||
{EXT_ "OCTETS-TO-STRING" ECL_FUN("si_octets_to_string", si_octets_to_string, -2) ECL_VAR(EXT_ORDINARY, OBJNULL)},
|
||||
{EXT_ "STRING-TO-OCTETS" ECL_FUN("si_string_to_octets", si_string_to_octets, -2) ECL_VAR(EXT_ORDINARY, OBJNULL)},
|
||||
{KEY_ "NULL-TERMINATE" ECL_FUN(NULL, NULL, -1) ECL_VAR(KEYWORD, OBJNULL)},
|
||||
/* #endif ECL_UNICODE */
|
||||
|
||||
{SYS_ "PROPER-LIST-P" ECL_FUN("si_proper_list_p", si_proper_list_p, 1) ECL_VAR(SI_ORDINARY, OBJNULL)},
|
||||
|
|
|
|||
12
src/configure
vendored
12
src/configure
vendored
|
|
@ -10099,6 +10099,18 @@ $as_echo "#define ECL_UNICODE 21" >>confdefs.h
|
|||
$as_echo "#define ECL_UNICODE_NAMES 1" >>confdefs.h
|
||||
|
||||
EXTRA_OBJS="$EXTRA_OBJS unicode/ucd_names_char.o unicode/ucd_names_codes.o unicode/ucd_names_pair.o unicode/ucd_names_str.o"
|
||||
for ac_header in wchar.h
|
||||
do :
|
||||
ac_fn_c_check_header_mongrel "$LINENO" "wchar.h" "ac_cv_header_wchar_h" "$ac_includes_default"
|
||||
if test "x$ac_cv_header_wchar_h" = xyes; then :
|
||||
cat >>confdefs.h <<_ACEOF
|
||||
#define HAVE_WCHAR_H 1
|
||||
_ACEOF
|
||||
|
||||
fi
|
||||
|
||||
done
|
||||
|
||||
else
|
||||
CHAR_CODE_LIMIT=256
|
||||
ECL_CHARACTER="int"
|
||||
|
|
|
|||
|
|
@ -908,6 +908,7 @@ if test "x${enable_unicode}" != "xno"; then
|
|||
fi
|
||||
AC_DEFINE([ECL_UNICODE_NAMES], [1], [Link in the database of Unicode names])
|
||||
EXTRA_OBJS="$EXTRA_OBJS unicode/ucd_names_char.o unicode/ucd_names_codes.o unicode/ucd_names_pair.o unicode/ucd_names_str.o"
|
||||
AC_CHECK_HEADERS([wchar.h])
|
||||
else
|
||||
CHAR_CODE_LIMIT=256
|
||||
ECL_CHARACTER="int"
|
||||
|
|
|
|||
|
|
@ -2301,6 +2301,15 @@ Evaluates FORMs in order from left to right. If any FORM evaluates to non-
|
|||
NIL, quits and returns that (single) value. If the last FORM is reached,
|
||||
returns whatever values it returns.")
|
||||
|
||||
(docfun ext::octets-to-string function (input &key
|
||||
(external-format :default)
|
||||
(start 0)
|
||||
(end nil)) "
|
||||
Decode a sequence of octets into a string according to the given
|
||||
external format. The bounding index designators start and end optionally
|
||||
denote a subsequence to be decoded.
|
||||
")
|
||||
|
||||
(docfun output-stream-p function (stream) "
|
||||
Returns T if STREAM can handle output operations; NIL otherwise.")
|
||||
|
||||
|
|
@ -3014,6 +3023,17 @@ Similar to STRING>=, but ignores cases.")
|
|||
Returns a copy of STRING with the specified characters removed from the right
|
||||
end. CHAR-SPEC must be a sequence of characters.")
|
||||
|
||||
(docfun ext::string-to-octets function (input &key
|
||||
(external-format :default)
|
||||
(start 0)
|
||||
(end nil)
|
||||
(null-terminate nil)) "
|
||||
Encode a string into a sequence of octets according to the given
|
||||
external format. The bounding index designators start and end
|
||||
optionally denote a subsequence to be encoded. If null-terminate is
|
||||
true, add a terminating null byte.
|
||||
")
|
||||
|
||||
(docfun si::string-to-object function (string) "
|
||||
ECL specific.
|
||||
Equivalent to (READ-FROM-STRING STRING), but is much faster.")
|
||||
|
|
|
|||
|
|
@ -152,6 +152,13 @@ Return the POSIX file descriptor of @var{file-stream} as an integer
|
|||
|
||||
@subsubsection External Format Extensions
|
||||
|
||||
@lspdef ext:*default-external-format*
|
||||
@defvar ext:*default-external-format*
|
||||
Default external format to use for reading from streams, dealing with
|
||||
filenames, etc. The default is to use utf-8 encoding if ECL is built
|
||||
with Unicode support.
|
||||
@end defvar
|
||||
|
||||
@lspdef ext:all-encodings
|
||||
@defun ext:all-encodings
|
||||
Return a list of all supported external formats
|
||||
|
|
|
|||
|
|
@ -67,6 +67,56 @@ The counterpart of the previous function is @coderef{ecl_char_set}, which implem
|
|||
Both functions check the type of their arguments and verify that the indices do not exceed the string boundaries. Otherwise they signal a @code{serious-condition}.
|
||||
@end deftypefun
|
||||
|
||||
@subsubsection Converting Unicode strings
|
||||
Converting between different encodings. See @ref{Streams - External formats} for a list of supported encodings (external formats).
|
||||
|
||||
@subsubheading Functions
|
||||
@cppdef si_octets_to_string
|
||||
@lspdef ext:octets-to-string
|
||||
@defun ext:octets-to-string octets &key (external-format :default) (start 0) (end nil)
|
||||
Decode a sequence of octets (i.e. 8-bit bytes) into a string according
|
||||
to the given external format. @var{octets} must be a vector whose
|
||||
elements have a size of 8-bit. The bounding index designators
|
||||
@var{start} and @var{end} optionally denote a subsequence to be decoded.
|
||||
Signals an @coderef{ext:character-decoding-error} if the decoding fails.
|
||||
@end defun
|
||||
|
||||
@cppdef si_string_to_octets
|
||||
@lspdef ext:string-to-octets
|
||||
@defun ext:string-to-octets string &key (external-format :default) (start 0) (end nil) (null-terminate nil)
|
||||
Encode a string into a sequence of octets according to the given
|
||||
external format. The bounding index designators @var{start} and
|
||||
@var{end} optionally denote a subsequence to be encoded. If
|
||||
@var{null-terminate} is true, add a terminating null byte. Signals an
|
||||
@coderef{ext:character-encoding-error} if the encoding fails.
|
||||
@end defun
|
||||
|
||||
@cppdef ecl_decode_from_cstring
|
||||
@deftypefun cl_object ecl_decode_from_cstring (const char *string, cl_fixnum length, cl_object external_format)
|
||||
Decode a C string of the given length into a Lisp string using the
|
||||
specified external format. If @var{length} is -1, the length is
|
||||
determined by @code{strlen}. Returns @code{NULL} if the decoding fails.
|
||||
@end deftypefun
|
||||
|
||||
@cppdef ecl_encode_to_cstring
|
||||
@deftypefun cl_fixnum ecl_encode_to_cstring (char *output, cl_fixnum output_length, cl_object input, cl_object external_format)
|
||||
Encode the Lisp string @var{input} into a C string of the given length
|
||||
using the specified external format. Returns the number of characters
|
||||
necessary to encode the Lisp string (including the null terminator). If
|
||||
this is larger than @var{output_length}, @var{output} is unchanged.
|
||||
Returns -1 if the encoding fails.
|
||||
@end deftypefun
|
||||
|
||||
@cppdef ecl_decode_from_unicode_wstring
|
||||
@cppdef ecl_encode_to_unicode_wstring
|
||||
@deftypefun cl_object ecl_decode_from_unicode_wstring (const wchar_t *string, cl_fixnum length)
|
||||
@deftypefunx cl_fixnum ecl_encode_to_unicode_wstring (wchar_t *output, cl_fixnum output_length, cl_object input)
|
||||
These functions work the same as @coderef{ecl_decode_from_cstring},
|
||||
@coderef{ecl_encode_to_cstring}, except that the external format used is
|
||||
either utf-8, utf-16 or utf-32 depending on whether
|
||||
@code{sizeof(wchar_t)} is 1, 2, or 4 respectively.
|
||||
@end deftypefun
|
||||
|
||||
@subsubsection ANSI dictionary
|
||||
Common Lisp and C equivalence
|
||||
|
||||
|
|
|
|||
|
|
@ -593,6 +593,9 @@
|
|||
/* Define to 1 if you have the <vfork.h> header file. */
|
||||
#undef HAVE_VFORK_H
|
||||
|
||||
/* Define to 1 if you have the <wchar.h> header file. */
|
||||
#undef HAVE_WCHAR_H
|
||||
|
||||
/* Define to 1 if `fork' works. */
|
||||
#undef HAVE_WORKING_FORK
|
||||
|
||||
|
|
|
|||
|
|
@ -232,6 +232,9 @@ typedef unsigned char ecl_base_char;
|
|||
/* feenableexcept is available */
|
||||
#undef HAVE_FEENABLEEXCEPT
|
||||
|
||||
/* wide-strings are available */
|
||||
#undef HAVE_WCHAR_H
|
||||
|
||||
/*
|
||||
* C macros for inlining, denoting probable code paths and other stuff
|
||||
* that makes better code. Most of it is GCC specific.
|
||||
|
|
|
|||
|
|
@ -1697,6 +1697,15 @@ extern ECL_API bool ecl_member_char(ecl_character c, cl_object char_bag);
|
|||
extern ECL_API bool ecl_fits_in_base_string(cl_object s);
|
||||
extern ECL_API ecl_character ecl_char(cl_object s, cl_index i);
|
||||
extern ECL_API ecl_character ecl_char_set(cl_object s, cl_index i, ecl_character c);
|
||||
extern ECL_API cl_object si_octets_to_string _ECL_ARGS((cl_narg narg, cl_object input, ...));
|
||||
extern ECL_API cl_object si_string_to_octets _ECL_ARGS((cl_narg narg, cl_object input, ...));
|
||||
extern ECL_API cl_object ecl_decode_from_cstring(const char *s, cl_fixnum len, cl_object encoding);
|
||||
extern ECL_API cl_fixnum ecl_encode_to_cstring(char *output, cl_fixnum output_len, cl_object input, cl_object encoding);
|
||||
#ifdef HAVE_WCHAR_H
|
||||
#include <wchar.h>
|
||||
extern ECL_API cl_object ecl_decode_from_unicode_wstring(const wchar_t *s, cl_fixnum len);
|
||||
extern ECL_API cl_fixnum ecl_encode_to_unicode_wstring(wchar_t *output, cl_fixnum output_len, cl_object input);
|
||||
#endif
|
||||
|
||||
/* structure.c */
|
||||
|
||||
|
|
|
|||
|
|
@ -82,9 +82,11 @@ appeared after a '--'.")
|
|||
("--c-stack" 1 (ext:set-limit 'ext:c-stack (read-from-string 1)))
|
||||
("--trap-fpe" 0 (si::trap-fpe t t))
|
||||
("--no-trap-fpe" 0 (si::trap-fpe t nil))
|
||||
("--encoding" 1 (dolist (i (list *standard-input* *standard-output*
|
||||
*error-output* *trace-output*))
|
||||
(setf (stream-external-format i) (read-from-string 1))))
|
||||
("--encoding" 1 (let ((enc (read-from-string 1)))
|
||||
(setf ext::*default-external-format* enc)
|
||||
(dolist (i (list *standard-input* *standard-output*
|
||||
*error-output* *trace-output*))
|
||||
(setf (stream-external-format i) enc))))
|
||||
("--input-encoding" 1
|
||||
(setf (stream-external-format *standard-input*) (read-from-string 1)))
|
||||
("--output-encoding" 1
|
||||
|
|
|
|||
|
|
@ -302,3 +302,103 @@ int main(int narg, char **argv)
|
|||
return 0;
|
||||
}"))
|
||||
(test-C-program c-code))))
|
||||
|
||||
;;; Date: 2021-08-13 (Marius Gerbershagen)
|
||||
;;; Description:
|
||||
;;;
|
||||
;;; Verify that ecl_decode_from_cstring, ecl_encode_to_cstring and
|
||||
;;; wide string equivalents work correctly
|
||||
;;;
|
||||
#+unicode
|
||||
(test emb.0005.decode/encode-cstrings
|
||||
(is-true
|
||||
(let* ((c-code "
|
||||
#include <ecl/ecl.h>
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
cl_boot(argc, argv);
|
||||
|
||||
cl_object utf_8 = ecl_make_keyword(\"UTF-8\");
|
||||
|
||||
unsigned char invalid[3] = {0xff, 0xfe, 0};
|
||||
if (ecl_decode_from_cstring(invalid, -1, utf_8) != NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
unsigned char x[9] = {240, 159, 145, 137, 240, 159, 145, 136, 0};
|
||||
cl_object s = cl_make_string(1, ecl_make_fixnum(2));
|
||||
ecl_char_set(s, 0, 128073);
|
||||
ecl_char_set(s, 1, 128072);
|
||||
|
||||
if (!ecl_equal(s, ecl_decode_from_cstring(x, -1, utf_8))
|
||||
|| !ecl_equal(s, ecl_decode_from_cstring(x, 8, utf_8))) {
|
||||
return -2;
|
||||
}
|
||||
|
||||
unsigned char y[9];
|
||||
if (ecl_encode_to_cstring(y, 9, s, utf_8) != 9) {
|
||||
return -3;
|
||||
}
|
||||
for (int i = 0; i < 9; i++) {
|
||||
if (x[i] != y[i]) {
|
||||
return -4;
|
||||
}
|
||||
}
|
||||
|
||||
if (ecl_encode_to_cstring(y, 1, s, utf_8) != 9) {
|
||||
return -5;
|
||||
}
|
||||
|
||||
if (ecl_encode_to_cstring(y, 9, s, ecl_make_keyword(\"US-ASCII\")) != -1) {
|
||||
return -6;
|
||||
}
|
||||
|
||||
#ifdef HAVE_WCHAR_H
|
||||
if (sizeof(wchar_t) == 2) {
|
||||
wchar_t u[5] = {55357, 64585, 55357, 64584, 0};
|
||||
if (!ecl_equal(s, ecl_decode_from_unicode_wstring(u, -1))
|
||||
|| !ecl_equal(s, ecl_decode_from_unicode_wstring(u, 4))) {
|
||||
return -7;
|
||||
}
|
||||
|
||||
wchar_t v[5];
|
||||
if (ecl_encode_to_unicode_wstring(v, 5, s) != 5) {
|
||||
return -8;
|
||||
}
|
||||
for (int i = 0; i < 5; i++) {
|
||||
if (u[i] != v[i]) {
|
||||
return -9;
|
||||
}
|
||||
}
|
||||
|
||||
if (ecl_encode_to_unicode_wstring(v, 1, s) != 5) {
|
||||
return -10;
|
||||
}
|
||||
} else if (sizeof(wchar_t) == 4) {
|
||||
wchar_t u[3] = {128073, 128072, 0};
|
||||
if (!ecl_equal(s, ecl_decode_from_unicode_wstring(u, -1))
|
||||
|| !ecl_equal(s, ecl_decode_from_unicode_wstring(u, 2))) {
|
||||
return -7;
|
||||
}
|
||||
|
||||
wchar_t v[3];
|
||||
if (ecl_encode_to_unicode_wstring(v, 3, s) != 3) {
|
||||
return -8;
|
||||
}
|
||||
for (int i = 0; i < 3; i++) {
|
||||
if (u[i] != v[i]) {
|
||||
return -9;
|
||||
}
|
||||
}
|
||||
|
||||
if (ecl_encode_to_unicode_wstring(v, 1, s) != 3) {
|
||||
return -10;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
cl_shutdown();
|
||||
return 0;
|
||||
}
|
||||
"))
|
||||
(test-C-program c-code))))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue