From d4994b33f0ca8431dbcabcc7ce3aca9ba9239464 Mon Sep 17 00:00:00 2001 From: Marius Gerbershagen Date: Sat, 14 Feb 2026 20:36:49 +0100 Subject: [PATCH] cmp: fix incorrect coercion of base strings to extended strings The compiler was coercing base strings to extended strings when encountering them as literal objects in compiled files. According to CLHS 3.2.4.2.2, we need to preserve the actual element type (after upgrading) of arrays during compilation. For ECL, the actual array element type of a base-string is base-char and therefore we can't change this to an extended string. Actually fixing this requires some work since we use the reader to store a printed representation of the string in the compiled file. The reader string syntax always returns an extended string (There is a comment in the code which claims that this is implied by CLHS 2.4.5. I am not quite sure if that is really true but for backwards compatibility I don't want to change this). We thus introduce a new syntax #"..." for base strings which is used when reading objects from compiled files. To prevent the new syntax from leaking outside of this context, we also introduce a new readtable. --- src/c/main.d | 1 + src/c/printer/write_array.d | 4 +++ src/c/read.d | 46 ++++++++++++++++++++++++----- src/h/external.h | 1 + src/tests/normal-tests/compiler.lsp | 24 +++++++++++++++ 5 files changed, 68 insertions(+), 8 deletions(-) diff --git a/src/c/main.d b/src/c/main.d index bcfb4756f..7f06fe6a7 100644 --- a/src/c/main.d +++ b/src/c/main.d @@ -394,6 +394,7 @@ struct cl_core_struct cl_core = { .error_output = ECL_NIL, .standard_readtable = ECL_NIL, .dispatch_reader = ECL_NIL, + .compiler_readtable = ECL_NIL, .char_names = ECL_NIL, .null_string = (cl_object)&str_empty_data, diff --git a/src/c/printer/write_array.d b/src/c/printer/write_array.d index 2fb17d766..749770382 100644 --- a/src/c/printer/write_array.d +++ b/src/c/printer/write_array.d @@ -173,6 +173,10 @@ _ecl_write_base_string(cl_object x, cl_object stream) if (!ecl_print_escape() && !ecl_print_readably()) { si_do_write_sequence(x, stream, ecl_make_fixnum(0), ECL_NIL); } else { + if (ecl_symbol_value(@'*print-array*') == @'base-string') { + /* Base string syntax used for data in compiled files. */ + ecl_write_char('#', stream); + } cl_index ndx, ndx_start; ecl_write_char('"', stream); for (ndx = ndx_start = 0; ndx < x->base_string.fillp; ndx++) { diff --git a/src/c/read.d b/src/c/read.d index 3cbb45a3e..8296de713 100644 --- a/src/c/read.d +++ b/src/c/read.d @@ -531,22 +531,27 @@ read_constituent(cl_object in, bool not_first) return (read_suppress)? ECL_NIL : token; } -static cl_object -double_quote_reader(cl_object in, cl_object c) +static void +read_string_into_buffer(cl_object in, cl_object c, cl_object buffer) { int delim = ECL_CHAR_CODE(c); cl_object rtbl = ecl_current_readtable(); - cl_object token = si_get_buffer_string(); - cl_object output; for (;;) { int c = ecl_read_char_noeof(in); if (c == delim) break; else if (ecl_readtable_get(rtbl, c, NULL) == cat_single_escape) c = ecl_read_char_noeof(in); - ecl_string_push_extend(token, c); + ecl_string_push_extend(buffer, c); } +} +static cl_object +double_quote_reader(cl_object in, cl_object c) +{ + cl_object output; + cl_object token = si_get_buffer_string(); + read_string_into_buffer(in, c, token); /* Must be kept a SIMPLE-STRING, meaning a (SIMPLE-ARRAY CHARACTERS * (*)), see CLHS 2.4.5. We thus can't coerce to a BASE-STRING. */ output = cl_copy_seq(token); @@ -800,6 +805,25 @@ sharp_Y_reader(cl_object in, cl_object c, cl_object d) @(return rv); } +static cl_object +sharp_double_quote_reader(cl_object in, cl_object c, cl_object d) +{ + /* Base string reader. Used for data in compiled files. */ + cl_object s, token; + + if (d != ECL_NIL && !read_suppress) + extra_argument('"', in, d); + + token = si_get_buffer_string(); + read_string_into_buffer(in, c, token); + s = si_copy_to_simple_base_string(token); + si_put_buffer_string(token); + + if (read_suppress) + @(return ECL_NIL); + @(return s); +} + #define QUOTE 1 #define EVAL 2 #define LIST 3 @@ -2030,7 +2054,7 @@ void init_read(void) { struct ecl_readtable_entry *rtab; - cl_object r; + cl_object r, r_cmp; int i; cl_core.standard_readtable = r = ecl_alloc_object(t_readtable); @@ -2149,6 +2173,12 @@ init_read(void) * to keep it unchanged */ r->readtable.locked = 1; + r_cmp = ecl_copy_readtable(cl_core.standard_readtable, ECL_NIL); + /* This is specific to this implementation: syntax for base strings */ + cl_set_dispatch_macro_character(4, ECL_CODE_CHAR('#'), ECL_CODE_CHAR('"'), + make_cf3(sharp_double_quote_reader), r_cmp); + cl_core.compiler_readtable = r_cmp; + init_backq(); ECL_SET(@'*readtable*', @@ -2187,7 +2217,7 @@ init_read(void) @'si::*circle-counter*'); val = cl_list(25, /**pprint-dispatch-table**/ ECL_NIL, - /**print-array**/ ECL_T, + /**print-array**/ @'base-string', /* base string syntax */ /**print-base**/ ecl_make_fixnum(10), /**print-case**/ @':downcase', /**print-circle**/ ECL_T, @@ -2205,7 +2235,7 @@ init_read(void) /**read-default-float-format**/ @'single-float', /**read-eval**/ ECL_T, /**read-suppress**/ ECL_NIL, - /**readtable**/ cl_core.standard_readtable, + /**readtable**/ cl_core.compiler_readtable, /**package**/ cl_core.lisp_package, /*si::*print-package**/ cl_core.lisp_package, /*si::*print-structure**/ ECL_T, diff --git a/src/h/external.h b/src/h/external.h index ffe69e090..c4be0e7ad 100755 --- a/src/h/external.h +++ b/src/h/external.h @@ -199,6 +199,7 @@ struct cl_core_struct { cl_object error_output; cl_object standard_readtable; cl_object dispatch_reader; + cl_object compiler_readtable; cl_object char_names; cl_object null_string; diff --git a/src/tests/normal-tests/compiler.lsp b/src/tests/normal-tests/compiler.lsp index 90a003ee9..f7541469e 100644 --- a/src/tests/normal-tests/compiler.lsp +++ b/src/tests/normal-tests/compiler.lsp @@ -2655,3 +2655,27 @@ (arithmetic-error () t)) collect (list (cons base exponent) types (expt base exponent) (funcall f base exponent))))) (is (null miscompiled-cases))))) + +;;; Date 2026-02-14 +;;; Description +;;; +;;; Incorrect coercion of base strings to extended strings when +;;; encountered as literal objects in compiled files. +;;; +(test cmp.0113.literal-base-string-coercion + (let ((ofile + (with-compiler ("base-string-0113.lsp" :load t) + "(defconstant +c.0113.1+ '#.(make-array 2 :element-type 'base-char :initial-element #\\a)) + (defconstant +c.0113.2+ '#.(make-array 2 :element-type 'character :initial-element #\\b)) + (defconstant +c.0113.3+ '#.(list (make-array 2 :element-type 'base-char :initial-element #\\a))) + (defconstant +c.0113.4+ '#.(list (make-array 2 :element-type 'character :initial-element #\\b)))"))) + (delete-file "base-string-0113.lsp") + (delete-file ofile) + (is (string= +c.0113.1+ "aa")) + (is (typep +c.0113.1+ 'base-string)) + (is (string= +c.0113.2+ "bb")) + (is (and (typep +c.0113.2+ 'string) (not (typep +c.0113.2+ 'base-string)))) + (is (string= (first +c.0113.3+) "aa")) + (is (typep (first +c.0113.3+) 'base-string)) + (is (string= (first +c.0113.4+) "bb")) + (is (and (typep (first +c.0113.4+) 'string) (not (typep (first +c.0113.4+) 'base-string))))))