cmp: fix incorrect coercion of base strings to extended strings

The compiler was coercing base strings to extended strings when
encountering them as literal objects in compiled files. According to
CLHS 3.2.4.2.2, we need to preserve the actual element type (after
upgrading) of arrays during compilation. For ECL, the actual array
element type of a base-string is base-char and therefore we can't
change this to an extended string.

Actually fixing this requires some work since we use the reader to
store a printed representation of the string in the compiled file. The
reader string syntax always returns an extended string (There is a
comment in the code which claims that this is implied by CLHS 2.4.5. I
am not quite sure if that is really true but for backwards
compatibility I don't want to change this). We thus introduce a new
syntax #"..." for base strings which is used when reading objects from
compiled files. To prevent the new syntax from leaking outside of this
context, we also introduce a new readtable.
This commit is contained in:
Marius Gerbershagen 2026-02-14 20:36:49 +01:00
parent 3842c579d4
commit d4994b33f0
5 changed files with 68 additions and 8 deletions

View file

@ -394,6 +394,7 @@ struct cl_core_struct cl_core = {
.error_output = ECL_NIL,
.standard_readtable = ECL_NIL,
.dispatch_reader = ECL_NIL,
.compiler_readtable = ECL_NIL,
.char_names = ECL_NIL,
.null_string = (cl_object)&str_empty_data,

View file

@ -173,6 +173,10 @@ _ecl_write_base_string(cl_object x, cl_object stream)
if (!ecl_print_escape() && !ecl_print_readably()) {
si_do_write_sequence(x, stream, ecl_make_fixnum(0), ECL_NIL);
} else {
if (ecl_symbol_value(@'*print-array*') == @'base-string') {
/* Base string syntax used for data in compiled files. */
ecl_write_char('#', stream);
}
cl_index ndx, ndx_start;
ecl_write_char('"', stream);
for (ndx = ndx_start = 0; ndx < x->base_string.fillp; ndx++) {

View file

@ -531,22 +531,27 @@ read_constituent(cl_object in, bool not_first)
return (read_suppress)? ECL_NIL : token;
}
static cl_object
double_quote_reader(cl_object in, cl_object c)
static void
read_string_into_buffer(cl_object in, cl_object c, cl_object buffer)
{
int delim = ECL_CHAR_CODE(c);
cl_object rtbl = ecl_current_readtable();
cl_object token = si_get_buffer_string();
cl_object output;
for (;;) {
int c = ecl_read_char_noeof(in);
if (c == delim)
break;
else if (ecl_readtable_get(rtbl, c, NULL) == cat_single_escape)
c = ecl_read_char_noeof(in);
ecl_string_push_extend(token, c);
ecl_string_push_extend(buffer, c);
}
}
static cl_object
double_quote_reader(cl_object in, cl_object c)
{
cl_object output;
cl_object token = si_get_buffer_string();
read_string_into_buffer(in, c, token);
/* Must be kept a SIMPLE-STRING, meaning a (SIMPLE-ARRAY CHARACTERS
* (*)), see CLHS 2.4.5. We thus can't coerce to a BASE-STRING. */
output = cl_copy_seq(token);
@ -800,6 +805,25 @@ sharp_Y_reader(cl_object in, cl_object c, cl_object d)
@(return rv);
}
static cl_object
sharp_double_quote_reader(cl_object in, cl_object c, cl_object d)
{
/* Base string reader. Used for data in compiled files. */
cl_object s, token;
if (d != ECL_NIL && !read_suppress)
extra_argument('"', in, d);
token = si_get_buffer_string();
read_string_into_buffer(in, c, token);
s = si_copy_to_simple_base_string(token);
si_put_buffer_string(token);
if (read_suppress)
@(return ECL_NIL);
@(return s);
}
#define QUOTE 1
#define EVAL 2
#define LIST 3
@ -2030,7 +2054,7 @@ void
init_read(void)
{
struct ecl_readtable_entry *rtab;
cl_object r;
cl_object r, r_cmp;
int i;
cl_core.standard_readtable = r = ecl_alloc_object(t_readtable);
@ -2149,6 +2173,12 @@ init_read(void)
* to keep it unchanged */
r->readtable.locked = 1;
r_cmp = ecl_copy_readtable(cl_core.standard_readtable, ECL_NIL);
/* This is specific to this implementation: syntax for base strings */
cl_set_dispatch_macro_character(4, ECL_CODE_CHAR('#'), ECL_CODE_CHAR('"'),
make_cf3(sharp_double_quote_reader), r_cmp);
cl_core.compiler_readtable = r_cmp;
init_backq();
ECL_SET(@'*readtable*',
@ -2187,7 +2217,7 @@ init_read(void)
@'si::*circle-counter*');
val = cl_list(25,
/**pprint-dispatch-table**/ ECL_NIL,
/**print-array**/ ECL_T,
/**print-array**/ @'base-string', /* base string syntax */
/**print-base**/ ecl_make_fixnum(10),
/**print-case**/ @':downcase',
/**print-circle**/ ECL_T,
@ -2205,7 +2235,7 @@ init_read(void)
/**read-default-float-format**/ @'single-float',
/**read-eval**/ ECL_T,
/**read-suppress**/ ECL_NIL,
/**readtable**/ cl_core.standard_readtable,
/**readtable**/ cl_core.compiler_readtable,
/**package**/ cl_core.lisp_package,
/*si::*print-package**/ cl_core.lisp_package,
/*si::*print-structure**/ ECL_T,

View file

@ -199,6 +199,7 @@ struct cl_core_struct {
cl_object error_output;
cl_object standard_readtable;
cl_object dispatch_reader;
cl_object compiler_readtable;
cl_object char_names;
cl_object null_string;

View file

@ -2655,3 +2655,27 @@
(arithmetic-error () t))
collect (list (cons base exponent) types (expt base exponent) (funcall f base exponent)))))
(is (null miscompiled-cases)))))
;;; Date 2026-02-14
;;; Description
;;;
;;; Incorrect coercion of base strings to extended strings when
;;; encountered as literal objects in compiled files.
;;;
(test cmp.0113.literal-base-string-coercion
(let ((ofile
(with-compiler ("base-string-0113.lsp" :load t)
"(defconstant +c.0113.1+ '#.(make-array 2 :element-type 'base-char :initial-element #\\a))
(defconstant +c.0113.2+ '#.(make-array 2 :element-type 'character :initial-element #\\b))
(defconstant +c.0113.3+ '#.(list (make-array 2 :element-type 'base-char :initial-element #\\a)))
(defconstant +c.0113.4+ '#.(list (make-array 2 :element-type 'character :initial-element #\\b)))")))
(delete-file "base-string-0113.lsp")
(delete-file ofile)
(is (string= +c.0113.1+ "aa"))
(is (typep +c.0113.1+ 'base-string))
(is (string= +c.0113.2+ "bb"))
(is (and (typep +c.0113.2+ 'string) (not (typep +c.0113.2+ 'base-string))))
(is (string= (first +c.0113.3+) "aa"))
(is (typep (first +c.0113.3+) 'base-string))
(is (string= (first +c.0113.4+) "bb"))
(is (and (typep (first +c.0113.4+) 'string) (not (typep (first +c.0113.4+) 'base-string))))))