mirror of
https://gitlab.com/embeddable-common-lisp/ecl.git
synced 2026-03-08 22:30:23 -07:00
cmp: fix incorrect coercion of base strings to extended strings
The compiler was coercing base strings to extended strings when encountering them as literal objects in compiled files. According to CLHS 3.2.4.2.2, we need to preserve the actual element type (after upgrading) of arrays during compilation. For ECL, the actual array element type of a base-string is base-char and therefore we can't change this to an extended string. Actually fixing this requires some work since we use the reader to store a printed representation of the string in the compiled file. The reader string syntax always returns an extended string (There is a comment in the code which claims that this is implied by CLHS 2.4.5. I am not quite sure if that is really true but for backwards compatibility I don't want to change this). We thus introduce a new syntax #"..." for base strings which is used when reading objects from compiled files. To prevent the new syntax from leaking outside of this context, we also introduce a new readtable.
This commit is contained in:
parent
3842c579d4
commit
d4994b33f0
5 changed files with 68 additions and 8 deletions
|
|
@ -394,6 +394,7 @@ struct cl_core_struct cl_core = {
|
|||
.error_output = ECL_NIL,
|
||||
.standard_readtable = ECL_NIL,
|
||||
.dispatch_reader = ECL_NIL,
|
||||
.compiler_readtable = ECL_NIL,
|
||||
|
||||
.char_names = ECL_NIL,
|
||||
.null_string = (cl_object)&str_empty_data,
|
||||
|
|
|
|||
|
|
@ -173,6 +173,10 @@ _ecl_write_base_string(cl_object x, cl_object stream)
|
|||
if (!ecl_print_escape() && !ecl_print_readably()) {
|
||||
si_do_write_sequence(x, stream, ecl_make_fixnum(0), ECL_NIL);
|
||||
} else {
|
||||
if (ecl_symbol_value(@'*print-array*') == @'base-string') {
|
||||
/* Base string syntax used for data in compiled files. */
|
||||
ecl_write_char('#', stream);
|
||||
}
|
||||
cl_index ndx, ndx_start;
|
||||
ecl_write_char('"', stream);
|
||||
for (ndx = ndx_start = 0; ndx < x->base_string.fillp; ndx++) {
|
||||
|
|
|
|||
46
src/c/read.d
46
src/c/read.d
|
|
@ -531,22 +531,27 @@ read_constituent(cl_object in, bool not_first)
|
|||
return (read_suppress)? ECL_NIL : token;
|
||||
}
|
||||
|
||||
static cl_object
|
||||
double_quote_reader(cl_object in, cl_object c)
|
||||
static void
|
||||
read_string_into_buffer(cl_object in, cl_object c, cl_object buffer)
|
||||
{
|
||||
int delim = ECL_CHAR_CODE(c);
|
||||
cl_object rtbl = ecl_current_readtable();
|
||||
cl_object token = si_get_buffer_string();
|
||||
cl_object output;
|
||||
for (;;) {
|
||||
int c = ecl_read_char_noeof(in);
|
||||
if (c == delim)
|
||||
break;
|
||||
else if (ecl_readtable_get(rtbl, c, NULL) == cat_single_escape)
|
||||
c = ecl_read_char_noeof(in);
|
||||
ecl_string_push_extend(token, c);
|
||||
ecl_string_push_extend(buffer, c);
|
||||
}
|
||||
}
|
||||
|
||||
static cl_object
|
||||
double_quote_reader(cl_object in, cl_object c)
|
||||
{
|
||||
cl_object output;
|
||||
cl_object token = si_get_buffer_string();
|
||||
read_string_into_buffer(in, c, token);
|
||||
/* Must be kept a SIMPLE-STRING, meaning a (SIMPLE-ARRAY CHARACTERS
|
||||
* (*)), see CLHS 2.4.5. We thus can't coerce to a BASE-STRING. */
|
||||
output = cl_copy_seq(token);
|
||||
|
|
@ -800,6 +805,25 @@ sharp_Y_reader(cl_object in, cl_object c, cl_object d)
|
|||
@(return rv);
|
||||
}
|
||||
|
||||
static cl_object
|
||||
sharp_double_quote_reader(cl_object in, cl_object c, cl_object d)
|
||||
{
|
||||
/* Base string reader. Used for data in compiled files. */
|
||||
cl_object s, token;
|
||||
|
||||
if (d != ECL_NIL && !read_suppress)
|
||||
extra_argument('"', in, d);
|
||||
|
||||
token = si_get_buffer_string();
|
||||
read_string_into_buffer(in, c, token);
|
||||
s = si_copy_to_simple_base_string(token);
|
||||
si_put_buffer_string(token);
|
||||
|
||||
if (read_suppress)
|
||||
@(return ECL_NIL);
|
||||
@(return s);
|
||||
}
|
||||
|
||||
#define QUOTE 1
|
||||
#define EVAL 2
|
||||
#define LIST 3
|
||||
|
|
@ -2030,7 +2054,7 @@ void
|
|||
init_read(void)
|
||||
{
|
||||
struct ecl_readtable_entry *rtab;
|
||||
cl_object r;
|
||||
cl_object r, r_cmp;
|
||||
int i;
|
||||
|
||||
cl_core.standard_readtable = r = ecl_alloc_object(t_readtable);
|
||||
|
|
@ -2149,6 +2173,12 @@ init_read(void)
|
|||
* to keep it unchanged */
|
||||
r->readtable.locked = 1;
|
||||
|
||||
r_cmp = ecl_copy_readtable(cl_core.standard_readtable, ECL_NIL);
|
||||
/* This is specific to this implementation: syntax for base strings */
|
||||
cl_set_dispatch_macro_character(4, ECL_CODE_CHAR('#'), ECL_CODE_CHAR('"'),
|
||||
make_cf3(sharp_double_quote_reader), r_cmp);
|
||||
cl_core.compiler_readtable = r_cmp;
|
||||
|
||||
init_backq();
|
||||
|
||||
ECL_SET(@'*readtable*',
|
||||
|
|
@ -2187,7 +2217,7 @@ init_read(void)
|
|||
@'si::*circle-counter*');
|
||||
val = cl_list(25,
|
||||
/**pprint-dispatch-table**/ ECL_NIL,
|
||||
/**print-array**/ ECL_T,
|
||||
/**print-array**/ @'base-string', /* base string syntax */
|
||||
/**print-base**/ ecl_make_fixnum(10),
|
||||
/**print-case**/ @':downcase',
|
||||
/**print-circle**/ ECL_T,
|
||||
|
|
@ -2205,7 +2235,7 @@ init_read(void)
|
|||
/**read-default-float-format**/ @'single-float',
|
||||
/**read-eval**/ ECL_T,
|
||||
/**read-suppress**/ ECL_NIL,
|
||||
/**readtable**/ cl_core.standard_readtable,
|
||||
/**readtable**/ cl_core.compiler_readtable,
|
||||
/**package**/ cl_core.lisp_package,
|
||||
/*si::*print-package**/ cl_core.lisp_package,
|
||||
/*si::*print-structure**/ ECL_T,
|
||||
|
|
|
|||
|
|
@ -199,6 +199,7 @@ struct cl_core_struct {
|
|||
cl_object error_output;
|
||||
cl_object standard_readtable;
|
||||
cl_object dispatch_reader;
|
||||
cl_object compiler_readtable;
|
||||
|
||||
cl_object char_names;
|
||||
cl_object null_string;
|
||||
|
|
|
|||
|
|
@ -2655,3 +2655,27 @@
|
|||
(arithmetic-error () t))
|
||||
collect (list (cons base exponent) types (expt base exponent) (funcall f base exponent)))))
|
||||
(is (null miscompiled-cases)))))
|
||||
|
||||
;;; Date 2026-02-14
|
||||
;;; Description
|
||||
;;;
|
||||
;;; Incorrect coercion of base strings to extended strings when
|
||||
;;; encountered as literal objects in compiled files.
|
||||
;;;
|
||||
(test cmp.0113.literal-base-string-coercion
|
||||
(let ((ofile
|
||||
(with-compiler ("base-string-0113.lsp" :load t)
|
||||
"(defconstant +c.0113.1+ '#.(make-array 2 :element-type 'base-char :initial-element #\\a))
|
||||
(defconstant +c.0113.2+ '#.(make-array 2 :element-type 'character :initial-element #\\b))
|
||||
(defconstant +c.0113.3+ '#.(list (make-array 2 :element-type 'base-char :initial-element #\\a)))
|
||||
(defconstant +c.0113.4+ '#.(list (make-array 2 :element-type 'character :initial-element #\\b)))")))
|
||||
(delete-file "base-string-0113.lsp")
|
||||
(delete-file ofile)
|
||||
(is (string= +c.0113.1+ "aa"))
|
||||
(is (typep +c.0113.1+ 'base-string))
|
||||
(is (string= +c.0113.2+ "bb"))
|
||||
(is (and (typep +c.0113.2+ 'string) (not (typep +c.0113.2+ 'base-string))))
|
||||
(is (string= (first +c.0113.3+) "aa"))
|
||||
(is (typep (first +c.0113.3+) 'base-string))
|
||||
(is (string= (first +c.0113.4+) "bb"))
|
||||
(is (and (typep (first +c.0113.4+) 'string) (not (typep (first +c.0113.4+) 'base-string))))))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue