mirror of
https://gitlab.com/embeddable-common-lisp/ecl.git
synced 2026-01-26 06:22:33 -08:00
add convenience methods for de-/encoding strings and wide-strings from C
This commit is contained in:
parent
548309e165
commit
a488595241
8 changed files with 259 additions and 0 deletions
106
src/c/string.d
106
src/c/string.d
|
|
@ -960,3 +960,109 @@ nstring_case(cl_narg narg, cl_object fun, ecl_casefun casefun, ecl_va_list ARGS)
|
|||
}
|
||||
@(return output);
|
||||
@)
|
||||
|
||||
cl_object
|
||||
ecl_decode_from_cstring(const char *s, cl_fixnum len, cl_object encoding)
|
||||
{
|
||||
volatile cl_object ret;
|
||||
ECL_HANDLER_CASE_BEGIN(ecl_process_env(), ecl_list1(@'ext::character-decoding-error')) {
|
||||
ret = si_octets_to_string(3, ecl_make_constant_base_string(s, len), @':external-format', encoding);
|
||||
} ECL_HANDLER_CASE(1, c) {
|
||||
ret = c; /* suppress "unused variable `c`" warning */
|
||||
ret = OBJNULL;
|
||||
} ECL_HANDLER_CASE_END;
|
||||
return ret;
|
||||
}
|
||||
|
||||
cl_fixnum
|
||||
ecl_encode_to_cstring(char *output, cl_fixnum output_len, cl_object input, cl_object encoding)
|
||||
{
|
||||
volatile cl_fixnum ret;
|
||||
ECL_HANDLER_CASE_BEGIN(ecl_process_env(), ecl_list1(@'ext::character-encoding-error')) {
|
||||
cl_object output_vec = si_string_to_octets(3, input, @':external-format', encoding);
|
||||
ret = output_vec->vector.fillp + 1;
|
||||
if (ret <= output_len) {
|
||||
memcpy(output, output_vec->vector.self.b8, (ret-1)*sizeof(char));
|
||||
output[ret-1] = 0; /* null-terminator */
|
||||
}
|
||||
} ECL_HANDLER_CASE(1, c) {
|
||||
input = c; /* suppress "unused variable `c`" warning */
|
||||
ret = -1;
|
||||
} ECL_HANDLER_CASE_END;
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef HAVE_WCHAR_H
|
||||
cl_object
|
||||
ecl_decode_from_unicode_wstring(const wchar_t *s, cl_fixnum len)
|
||||
{
|
||||
cl_object input;
|
||||
cl_object elttype;
|
||||
cl_object encoding;
|
||||
volatile cl_object ret;
|
||||
if (len < 0) {
|
||||
len = wcslen(s);
|
||||
}
|
||||
switch (sizeof(wchar_t)) {
|
||||
case 1:
|
||||
elttype = @'ext::byte8';
|
||||
encoding = @':utf-8';
|
||||
break;
|
||||
case 2:
|
||||
elttype = @'ext::byte16';
|
||||
encoding = @':ucs-2';
|
||||
break;
|
||||
case 4:
|
||||
elttype = @'ext::byte32';
|
||||
encoding = @':ucs-4';
|
||||
break;
|
||||
default:
|
||||
ecl_internal_error("Unexpected sizeof(wchar_t)");
|
||||
}
|
||||
input = si_make_vector(elttype, ecl_make_fixnum(len), ECL_NIL, ECL_NIL, ECL_NIL, ECL_NIL);
|
||||
memcpy(input->vector.self.b8, s, len*sizeof(wchar_t));
|
||||
ECL_HANDLER_CASE_BEGIN(ecl_process_env(), ecl_list1(@'ext::character-decoding-error')) {
|
||||
ret = si_octets_to_string(3, input, @':external-format', encoding);
|
||||
} ECL_HANDLER_CASE(1, c) {
|
||||
ret = c; /* suppress "unused variable `c`" warning */
|
||||
ret = OBJNULL;
|
||||
} ECL_HANDLER_CASE_END;
|
||||
return ret;
|
||||
}
|
||||
|
||||
cl_fixnum
|
||||
ecl_encode_to_unicode_wstring(wchar_t *output, cl_fixnum output_len, cl_object input)
|
||||
{
|
||||
cl_object elttype;
|
||||
cl_object encoding;
|
||||
volatile cl_fixnum ret;
|
||||
switch (sizeof(wchar_t)) {
|
||||
case 1:
|
||||
elttype = @'ext::byte8';
|
||||
encoding = @':utf-8';
|
||||
break;
|
||||
case 2:
|
||||
elttype = @'ext::byte16';
|
||||
encoding = @':ucs-2';
|
||||
break;
|
||||
case 4:
|
||||
elttype = @'ext::byte32';
|
||||
encoding = @':ucs-4';
|
||||
break;
|
||||
default:
|
||||
ecl_internal_error("Unexpected sizeof(wchar_t)");
|
||||
}
|
||||
ECL_HANDLER_CASE_BEGIN(ecl_process_env(), ecl_list1(@'ext::character-encoding-error')) {
|
||||
cl_object output_vec = si_string_to_octets(5, input, @':external-format', encoding, @':element-type', elttype);
|
||||
ret = output_vec->vector.fillp + 1;
|
||||
if (ret <= output_len) {
|
||||
memcpy(output, output_vec->vector.self.b8, (ret-1)*sizeof(wchar_t));
|
||||
output[ret-1] = 0; /* null-terminator */
|
||||
}
|
||||
} ECL_HANDLER_CASE(1, c) {
|
||||
input = c; /* suppress "unused variable `c`" warning */
|
||||
ret = -1;
|
||||
} ECL_HANDLER_CASE_END;
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
12
src/configure
vendored
12
src/configure
vendored
|
|
@ -10099,6 +10099,18 @@ $as_echo "#define ECL_UNICODE 21" >>confdefs.h
|
|||
$as_echo "#define ECL_UNICODE_NAMES 1" >>confdefs.h
|
||||
|
||||
EXTRA_OBJS="$EXTRA_OBJS unicode/ucd_names_char.o unicode/ucd_names_codes.o unicode/ucd_names_pair.o unicode/ucd_names_str.o"
|
||||
for ac_header in wchar.h
|
||||
do :
|
||||
ac_fn_c_check_header_mongrel "$LINENO" "wchar.h" "ac_cv_header_wchar_h" "$ac_includes_default"
|
||||
if test "x$ac_cv_header_wchar_h" = xyes; then :
|
||||
cat >>confdefs.h <<_ACEOF
|
||||
#define HAVE_WCHAR_H 1
|
||||
_ACEOF
|
||||
|
||||
fi
|
||||
|
||||
done
|
||||
|
||||
else
|
||||
CHAR_CODE_LIMIT=256
|
||||
ECL_CHARACTER="int"
|
||||
|
|
|
|||
|
|
@ -908,6 +908,7 @@ if test "x${enable_unicode}" != "xno"; then
|
|||
fi
|
||||
AC_DEFINE([ECL_UNICODE_NAMES], [1], [Link in the database of Unicode names])
|
||||
EXTRA_OBJS="$EXTRA_OBJS unicode/ucd_names_char.o unicode/ucd_names_codes.o unicode/ucd_names_pair.o unicode/ucd_names_str.o"
|
||||
AC_CHECK_HEADERS([wchar.h])
|
||||
else
|
||||
CHAR_CODE_LIMIT=256
|
||||
ECL_CHARACTER="int"
|
||||
|
|
|
|||
|
|
@ -90,6 +90,33 @@ external format. The bounding index designators @var{start} and
|
|||
@var{null-terminate} is true, add a terminating null byte. Signals an
|
||||
@coderef{ext:character-encoding-error} if the encoding fails.
|
||||
@end defun
|
||||
|
||||
@cppdef ecl_decode_from_cstring
|
||||
@deftypefun cl_object ecl_decode_from_cstring (const char *string, cl_fixnum length, cl_object external_format)
|
||||
Decode a C string of the given length into a Lisp string using the
|
||||
specified external format. If @var{length} is -1, the length is
|
||||
determined by @code{strlen}. Returns @code{NULL} if the decoding fails.
|
||||
@end deftypefun
|
||||
|
||||
@cppdef ecl_encode_to_cstring
|
||||
@deftypefun cl_fixnum ecl_encode_to_cstring (char *output, cl_fixnum output_length, cl_object input, cl_object external_format)
|
||||
Encode the Lisp string @var{input} into a C string of the given length
|
||||
using the specified external format. Returns the number of characters
|
||||
necessary to encode the Lisp string (including the null terminator). If
|
||||
this is larger than @var{output_length}, @var{output} is unchanged.
|
||||
Returns -1 if the encoding fails.
|
||||
@end deftypefun
|
||||
|
||||
@cppdef ecl_decode_from_unicode_wstring
|
||||
@cppdef ecl_encode_to_unicode_wstring
|
||||
@deftypefun cl_object ecl_decode_from_unicode_wstring (const wchar_t *string, cl_fixnum length)
|
||||
@deftypefunx cl_fixnum ecl_encode_to_unicode_wstring (wchar_t *output, cl_fixnum output_length, cl_object input)
|
||||
These functions work the same as @coderef{ecl_decode_from_cstring},
|
||||
@coderef{ecl_encode_to_cstring}, except that the external format used is
|
||||
either utf-8, utf-16 or utf-32 depending on whether
|
||||
@code{sizeof(wchar_t)} is 1, 2, or 4 respectively.
|
||||
@end deftypefun
|
||||
|
||||
@subsubsection ANSI dictionary
|
||||
Common Lisp and C equivalence
|
||||
|
||||
|
|
|
|||
|
|
@ -593,6 +593,9 @@
|
|||
/* Define to 1 if you have the <vfork.h> header file. */
|
||||
#undef HAVE_VFORK_H
|
||||
|
||||
/* Define to 1 if you have the <wchar.h> header file. */
|
||||
#undef HAVE_WCHAR_H
|
||||
|
||||
/* Define to 1 if `fork' works. */
|
||||
#undef HAVE_WORKING_FORK
|
||||
|
||||
|
|
|
|||
|
|
@ -232,6 +232,9 @@ typedef unsigned char ecl_base_char;
|
|||
/* feenableexcept is available */
|
||||
#undef HAVE_FEENABLEEXCEPT
|
||||
|
||||
/* wide-strings are available */
|
||||
#undef HAVE_WCHAR_H
|
||||
|
||||
/*
|
||||
* C macros for inlining, denoting probable code paths and other stuff
|
||||
* that makes better code. Most of it is GCC specific.
|
||||
|
|
|
|||
|
|
@ -1699,6 +1699,13 @@ extern ECL_API ecl_character ecl_char(cl_object s, cl_index i);
|
|||
extern ECL_API ecl_character ecl_char_set(cl_object s, cl_index i, ecl_character c);
|
||||
extern ECL_API cl_object si_octets_to_string _ECL_ARGS((cl_narg narg, cl_object input, ...));
|
||||
extern ECL_API cl_object si_string_to_octets _ECL_ARGS((cl_narg narg, cl_object input, ...));
|
||||
extern ECL_API cl_object ecl_decode_from_cstring(const char *s, cl_fixnum len, cl_object encoding);
|
||||
extern ECL_API cl_fixnum ecl_encode_to_cstring(char *output, cl_fixnum output_len, cl_object input, cl_object encoding);
|
||||
#ifdef HAVE_WCHAR_H
|
||||
#include <wchar.h>
|
||||
extern ECL_API cl_object ecl_decode_from_unicode_wstring(const wchar_t *s, cl_fixnum len);
|
||||
extern ECL_API cl_fixnum ecl_encode_to_unicode_wstring(wchar_t *output, cl_fixnum output_len, cl_object input);
|
||||
#endif
|
||||
|
||||
/* structure.c */
|
||||
|
||||
|
|
|
|||
|
|
@ -302,3 +302,103 @@ int main(int narg, char **argv)
|
|||
return 0;
|
||||
}"))
|
||||
(test-C-program c-code))))
|
||||
|
||||
;;; Date: 2021-08-13 (Marius Gerbershagen)
|
||||
;;; Description:
|
||||
;;;
|
||||
;;; Verify that ecl_decode_from_cstring, ecl_encode_to_cstring and
|
||||
;;; wide string equivalents work correctly
|
||||
;;;
|
||||
#+unicode
|
||||
(test emb.0005.decode/encode-cstrings
|
||||
(is-true
|
||||
(let* ((c-code "
|
||||
#include <ecl/ecl.h>
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
cl_boot(argc, argv);
|
||||
|
||||
cl_object utf_8 = ecl_make_keyword(\"UTF-8\");
|
||||
|
||||
unsigned char invalid[3] = {0xff, 0xfe, 0};
|
||||
if (ecl_decode_from_cstring(invalid, -1, utf_8) != NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
unsigned char x[9] = {240, 159, 145, 137, 240, 159, 145, 136, 0};
|
||||
cl_object s = cl_make_string(1, ecl_make_fixnum(2));
|
||||
ecl_char_set(s, 0, 128073);
|
||||
ecl_char_set(s, 1, 128072);
|
||||
|
||||
if (!ecl_equal(s, ecl_decode_from_cstring(x, -1, utf_8))
|
||||
|| !ecl_equal(s, ecl_decode_from_cstring(x, 8, utf_8))) {
|
||||
return -2;
|
||||
}
|
||||
|
||||
unsigned char y[9];
|
||||
if (ecl_encode_to_cstring(y, 9, s, utf_8) != 9) {
|
||||
return -3;
|
||||
}
|
||||
for (int i = 0; i < 9; i++) {
|
||||
if (x[i] != y[i]) {
|
||||
return -4;
|
||||
}
|
||||
}
|
||||
|
||||
if (ecl_encode_to_cstring(y, 1, s, utf_8) != 9) {
|
||||
return -5;
|
||||
}
|
||||
|
||||
if (ecl_encode_to_cstring(y, 9, s, ecl_make_keyword(\"US-ASCII\")) != -1) {
|
||||
return -6;
|
||||
}
|
||||
|
||||
#ifdef HAVE_WCHAR_H
|
||||
if (sizeof(wchar_t) == 2) {
|
||||
wchar_t u[5] = {55357, 64585, 55357, 64584, 0};
|
||||
if (!ecl_equal(s, ecl_decode_from_unicode_wstring(u, -1))
|
||||
|| !ecl_equal(s, ecl_decode_from_unicode_wstring(u, 4))) {
|
||||
return -7;
|
||||
}
|
||||
|
||||
wchar_t v[5];
|
||||
if (ecl_encode_to_unicode_wstring(v, 5, s) != 5) {
|
||||
return -8;
|
||||
}
|
||||
for (int i = 0; i < 5; i++) {
|
||||
if (u[i] != v[i]) {
|
||||
return -9;
|
||||
}
|
||||
}
|
||||
|
||||
if (ecl_encode_to_unicode_wstring(v, 1, s) != 5) {
|
||||
return -10;
|
||||
}
|
||||
} else if (sizeof(wchar_t) == 4) {
|
||||
wchar_t u[3] = {128073, 128072, 0};
|
||||
if (!ecl_equal(s, ecl_decode_from_unicode_wstring(u, -1))
|
||||
|| !ecl_equal(s, ecl_decode_from_unicode_wstring(u, 2))) {
|
||||
return -7;
|
||||
}
|
||||
|
||||
wchar_t v[3];
|
||||
if (ecl_encode_to_unicode_wstring(v, 3, s) != 3) {
|
||||
return -8;
|
||||
}
|
||||
for (int i = 0; i < 3; i++) {
|
||||
if (u[i] != v[i]) {
|
||||
return -9;
|
||||
}
|
||||
}
|
||||
|
||||
if (ecl_encode_to_unicode_wstring(v, 1, s) != 3) {
|
||||
return -10;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
cl_shutdown();
|
||||
return 0;
|
||||
}
|
||||
"))
|
||||
(test-C-program c-code))))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue