From 2bdff342a81b0a7b1731b1a7c9ae3687ca1064a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Kochma=C5=84ski?= Date: Tue, 3 Mar 2026 09:42:41 +0100 Subject: [PATCH] reader: add new operators and fix found regressions - low limit in escapes was off-by-one - quoted character immediately after :, like :|foo| did not pare correctly --- src/c/reader.d | 40 +++++++++++++++++++++++++++++++++++--- src/c/reader/parse_token.d | 38 ++++++++++++++++++++++++------------ src/c/reader/rtab_cl.d | 3 +-- src/c/symbols_list.h | 6 ++++++ src/h/external.h | 6 ++++++ 5 files changed, 76 insertions(+), 17 deletions(-) diff --git a/src/c/reader.d b/src/c/reader.d index 7102044d8..adec80275 100644 --- a/src/c/reader.d +++ b/src/c/reader.d @@ -39,6 +39,23 @@ ecl_make_token() return o; } +cl_object +si_token_string(cl_object token) +{ + cl_env_ptr the_env = ecl_process_env(); + cl_object object = token->token.string; + ecl_return1(the_env, object); +} + +cl_object +si_token_escape(cl_object token) +{ + cl_env_ptr the_env = ecl_process_env(); + cl_object object = token->token.escape; + ecl_return1(the_env, object); +} + + /* FIXME pools should be resizeable stacks. */ cl_object ecl_get_reader_token(void) @@ -172,10 +189,10 @@ ecl_read_token(cl_object in, bool escape_first_p) if (a == cat_single_escape) { c = ecl_read_char_noeof(in); a = cat_constituent; - ecl_stack_push(escape, ecl_make_fixnum(length-1)); - ecl_stack_push(escape, ecl_make_fixnum(length)); ecl_string_push_extend(string, c); length++; + ecl_stack_push(escape, ecl_make_fixnum(length-1)); + ecl_stack_push(escape, ecl_make_fixnum(length)); goto NEXT; } if (a == cat_multiple_escape) { @@ -192,7 +209,7 @@ ecl_read_token(cl_object in, bool escape_first_p) length++; } ecl_stack_push(escape, ecl_make_fixnum(begin)); - ecl_stack_push(escape, ecl_make_fixnum(length-1)); + ecl_stack_push(escape, ecl_make_fixnum(length)); goto NEXT; } if (a == cat_whitespace || a == cat_terminating) { @@ -294,3 +311,20 @@ ecl_read_object_with_delimiter(cl_object in, int delimiter, int flags) ecl_put_reader_token(token); return x; } + +cl_object +si_read_object(cl_object strm, cl_object delimiter) +{ + cl_env_ptr the_env = ecl_process_env(); + int ch = Null(delimiter) ? 0 : ecl_char_code(delimiter); + cl_object object = ecl_read_object_with_delimiter(strm, ch, 0); + ecl_return1(the_env, object); +} + +cl_object +si_read_token(cl_object strm) +{ + cl_env_ptr the_env = ecl_process_env(); + cl_object object = ecl_read_token(strm, 0); + ecl_return1(the_env, object); +} diff --git a/src/c/reader/parse_token.d b/src/c/reader/parse_token.d index 240260257..de3c6ffcc 100644 --- a/src/c/reader/parse_token.d +++ b/src/c/reader/parse_token.d @@ -38,12 +38,19 @@ ecl_parse_token(cl_object token, cl_object in, int flags) if (c == ':') { if(!Null(package)) FEreader_error("Unexpected colon character.", in, 0); - if (colon > 1) FEreader_error("Too many colons.", in, 0); - if (colon < 1) pack_end = str_i; - colon++; - sym_start = str_i+1; - continue; - } else if (colon) { + pack_end = str_i; + /* Eat all ':' and advance the pointer after them. */ + while(c == ':') { + colon++; + if (colon > 2) + FEreader_error("Too many colons.", in, 0); + str_i++; + if (str_i == low_limit) { + break; + } + c = ecl_char(string, str_i); + } + sym_start = str_i; external_symbol = (colon == 1); if (pack_end == 0) { package = cl_core.keyword_package; @@ -53,11 +60,10 @@ ecl_parse_token(cl_object token, cl_object in, int flags) package = ecl_find_package_nolock(package_name); } if (Null(package)) { - /* When loading binary files, we sometimes must create - symbols whose package has not yet been maked. We - allow it, but later on in ecl_init_module we make sure that - all referenced packages have been properly built. - */ + /* When loading binary files, we sometimes must create symbols whose + package has not yet been maked. We allow it, but later on in + ecl_init_module we make sure that all referenced packages have been + properly built. */ unlikely_if (Null(the_env->packages_to_be_created_p)) { ecl_put_reader_token(token); FEerror("There is no package with the name ~A.", 1, package_name); @@ -74,7 +80,7 @@ ecl_parse_token(cl_object token, cl_object in, int flags) if (package != ECL_NIL || ecl_length(escape) > 2 || length == 0) goto SYMBOL; - /* The case in which the buffer is full of dots has to be especial cased */ + /* The case in which the buffer is full of dots has to be especial cased. */ if (length == 1 && TOKEN_STRING_CHAR_CMP(string, 0, '.')) { if (flags == ECL_READ_LIST_DOT) { x = @'si::.'; @@ -125,3 +131,11 @@ ecl_parse_token(cl_object token, cl_object in, int flags) OUTPUT: return x; } + +cl_object +si_parse_token(cl_object token) +{ + cl_env_ptr the_env = ecl_process_env(); + cl_object object = ecl_parse_token(token, ECL_NIL, 42); + ecl_return1(the_env, object); +} diff --git a/src/c/reader/rtab_cl.d b/src/c/reader/rtab_cl.d index 32e03d087..ed0f4d2aa 100644 --- a/src/c/reader/rtab_cl.d +++ b/src/c/reader/rtab_cl.d @@ -213,7 +213,7 @@ static cl_object sharp_backslash_reader(cl_object in, cl_object c, cl_object d) { const cl_env_ptr the_env = ecl_process_env(); - cl_object token, string, escape; + cl_object token, string; if (d != ECL_NIL && !read_suppress) { unlikely_if (!ECL_FIXNUMP(d) || d != ecl_make_fixnum(0)) { FEreader_error("~S is an illegal CHAR-FONT.", in, 1, d); @@ -221,7 +221,6 @@ sharp_backslash_reader(cl_object in, cl_object c, cl_object d) } token = ecl_read_token(in, 1); string = token->token.string; - escape = token->token.escape; if (TOKEN_STRING_FILLP(string) == 1) { c = ECL_CODE_CHAR(TOKEN_STRING_CHAR(string,0)); } else if (TOKEN_STRING_FILLP(string) == 2 && TOKEN_STRING_CHAR_CMP(string,0,'^')) { diff --git a/src/c/symbols_list.h b/src/c/symbols_list.h index c23ca7160..6ad2faef8 100644 --- a/src/c/symbols_list.h +++ b/src/c/symbols_list.h @@ -1998,10 +1998,16 @@ cl_symbols[] = { {SYS_ "LONG-FLOAT-BITS" ECL_FUN(NULL, NULL, 1) ECL_VAR(SI_ORDINARY, OBJNULL)}, {SYS_ "BITS-LONG-FLOAT" ECL_FUN(NULL, NULL, 1) ECL_VAR(SI_ORDINARY, OBJNULL)}, +{SYS_ "PARSE-TOKEN" ECL_FUN("si_parse_token", si_parse_token, 1) ECL_VAR(EXT_ORDINARY, OBJNULL)}, {SYS_ "READ-OBJECT-OR-IGNORE" ECL_FUN("si_read_object_or_ignore", si_read_object_or_ignore, 2) ECL_VAR(EXT_ORDINARY, OBJNULL)}, +{SYS_ "READ-OBJECT" ECL_FUN("si_read_object", si_read_object, 2) ECL_VAR(EXT_ORDINARY, OBJNULL)}, +{SYS_ "READ-TOKEN" ECL_FUN("si_read_token", si_read_token, 1) ECL_VAR(EXT_ORDINARY, OBJNULL)}, {EXT_ "READTABLE-LOCK" ECL_FUN("si_readtable_lock", si_readtable_lock, -2) ECL_VAR(EXT_ORDINARY, OBJNULL)}, +{SYS_ "TOKEN-STRING" ECL_FUN("si_read_token", si_token_string, 1) ECL_VAR(EXT_ORDINARY, OBJNULL)}, +{SYS_ "TOKEN-ESCAPE" ECL_FUN("si_read_token", si_token_escape, 1) ECL_VAR(EXT_ORDINARY, OBJNULL)}, + {SYS_ "+IO-SYNTAX-PROGV-LIST+" ECL_FUN(NULL, NULL, -1) ECL_VAR(SI_CONSTANT, OBJNULL)}, {SYS_ "+ECL-SYNTAX-PROGV-LIST+" ECL_FUN(NULL, NULL, -1) ECL_VAR(SI_CONSTANT, OBJNULL)}, {SYS_ "WITH-ECL-IO-SYNTAX" ECL_FUN(NULL, NULL, -1) ECL_VAR(SI_ORDINARY, OBJNULL)}, diff --git a/src/h/external.h b/src/h/external.h index 1151d9823..de480d2bd 100755 --- a/src/h/external.h +++ b/src/h/external.h @@ -1592,6 +1592,12 @@ extern ECL_API char ecl_current_read_default_float_format(void); #define ecl_read_from_cstring_safe(s,v) si_string_to_object(2,ecl_make_constant_base_string(s,-1),(v)) extern ECL_API cl_object ecl_init_module(cl_object block, void (*entry)(cl_object)); +extern ECL_API cl_object si_parse_token(cl_object token); +extern ECL_API cl_object si_read_object(cl_object, cl_object); +extern ECL_API cl_object si_read_token(cl_object); +extern ECL_API cl_object si_token_string(cl_object); +extern ECL_API cl_object si_token_escape(cl_object); + /* reference.c */ extern ECL_API cl_object cl_fboundp(cl_object sym);