reader: wrap up the escaping and remove kludges

This commit is contained in:
Daniel Kochmański 2026-03-04 23:36:55 +01:00
parent b6ac33834f
commit f1c3edfeb3
3 changed files with 75 additions and 80 deletions

View file

@ -34,8 +34,7 @@ ecl_make_token()
cl_object o = ecl_alloc_object(t_token);
o->token.escaped = 0;
o->token.string = si_get_buffer_string();
/* To keep looping code simple, we insert an empty interval at the end. */
o->token.escape = ecl_make_stack(2);
o->token.escape = ecl_make_stack(0);
return o;
}
@ -76,36 +75,30 @@ ecl_put_reader_token(cl_object token)
{
const cl_env_ptr the_env = ecl_process_env();
cl_object pool = the_env->token_pool;
ecl_wipe_stack(token->token.escape);
TOKEN_STRING_FILLP(token->token.string) = 0;
TOKEN_ESCAPE_FILLP(token->token.escape) = 0;
token->token.escaped = 0;
the_env->token_pool = CONS(token, pool);
}
/*
* This routine inverts the case of the characters in the buffer which were not
* escaped. ESCAPE_INTERVALS is a vector of intevals of characters that were
* escaped, as in ({(low-limit . high-limit)}*).
* escaped.
*/
static void
invert_buffer_case(cl_object o, int sign)
{
cl_object string = o->token.string;
cl_object escape = o->token.escape;
cl_fixnum str_i = 0;
int c;
/* see whether we have a bug with reversed beginning/end */
loop_across_eints(low_limit, high_limit, escape) {
for(; str_i<low_limit; str_i++) {
c = TOKEN_STRING_CHAR(string, str_i);
if (ecl_upper_case_p(c) && (sign < 0)) {
c = ecl_char_downcase(c);
} else if (ecl_lower_case_p(c) && (sign > 0)) {
c = ecl_char_upcase(c);
}
TOKEN_STRING_CHAR_SET(string, str_i, c);
cl_fixnum i;
loop_across_token(index, limit, string, o) {
c = TOKEN_STRING_CHAR(string, index);
if (ecl_upper_case_p(c) && (sign < 0)) {
c = ecl_char_downcase(c);
} else if (ecl_lower_case_p(c) && (sign > 0)) {
c = ecl_char_upcase(c);
}
str_i=high_limit;
} end_loop_across_eints();
TOKEN_STRING_CHAR_SET(string, index, c);
} end_loop_across_token();
}
/*
@ -240,8 +233,7 @@ ecl_read_token(cl_object in, bool escape_first_p)
break;
a = ecl_readtable_get(rtbl, c, NULL);
}
ecl_stack_push(escape, ecl_make_fixnum(length));
ecl_stack_push(escape, ecl_make_fixnum(length));
token->token.escaped = (TOKEN_ESCAPE_FILLP(escape) > 0);
/*TOKEN_STRING_CHAR_SET(string,length,'\0');*/
/* If the readtable case was :INVERT and all non-escaped characters

View file

@ -19,65 +19,58 @@
cl_object
ecl_parse_token(cl_object token, cl_object in, int flags)
{
cl_fixnum str_i;
cl_index length, i, sym_start, pack_end;
int colon, intern_flag, c, base;
bool external_symbol;
cl_object package, package_name, symbol_name, string, escape, x;
cl_fixnum length, sym_start, pack_end;
cl_index i;
int intern_flag, base;
bool external_symbol, escaped;
cl_object package, package_name, symbol_name, string, x;
cl_env_ptr the_env = ecl_process_env();
string = token->token.string;
escape = token->token.escape;
escaped = token->token.escaped;
package = package_name = symbol_name = x = ECL_NIL;
str_i = sym_start = pack_end = colon = 0;
sym_start = pack_end = 0;
length = ecl_length(string);
external_symbol = 0;
loop_across_eints(low_limit, high_limit, escape) {
for(; str_i<low_limit; str_i++) {
c = ecl_char(string, str_i);
if (c == ':') {
if(!Null(package))
FEreader_error("Unexpected colon character.", in, 0);
pack_end = str_i;
/* Eat all ':' and advance the pointer after them. */
while(c == ':') {
colon++;
if (colon > 2)
FEreader_error("Too many colons.", in, 0);
str_i++;
if (str_i == low_limit) {
break;
}
c = ecl_char(string, str_i);
}
sym_start = str_i;
external_symbol = (colon == 1);
if (pack_end == 0) {
package = cl_core.keyword_package;
external_symbol = 0;
} else {
package_name = ecl_subseq(string, 0, pack_end);
package = ecl_find_package_nolock(package_name);
}
if (Null(package)) {
/* When loading binary files, we sometimes must create symbols whose
package has not yet been maked. We allow it, but later on in
ecl_init_module we make sure that all referenced packages have been
properly built. */
unlikely_if (Null(the_env->packages_to_be_created_p)) {
ecl_put_reader_token(token);
FEerror("There is no package with the name ~A.", 1, package_name);
}
package = _ecl_package_to_be_created(the_env, package_name);
}
/* Parse the string to find possible package separator. */
loop_across_token(index, limit, string, token) {
if (ecl_char(string, index) == ':') {
if(sym_start)
FEreader_error("Unexpected colon character.", in, 0);
pack_end = index++;
if(index < limit && ecl_char(string, index) == ':') {
sym_start=index+1;
external_symbol = 0;
} else {
sym_start=index;
external_symbol = 1;
}
}} end_loop_across_token();
/* When the separator was found, then ensure the package. */
if(sym_start) {
if (pack_end == 0) {
package = cl_core.keyword_package;
external_symbol = 0;
} else {
package_name = ecl_subseq(string, 0, pack_end);
package = ecl_find_package_nolock(package_name);
}
str_i=high_limit;
} end_loop_across_eints();
if (Null(package)) {
/* When loading binary files, we sometimes must create symbols whose
package has not yet been maked. We allow it, but later on in
ecl_init_module we make sure that all referenced packages have been
properly built. */
unlikely_if (Null(the_env->packages_to_be_created_p)) {
ecl_put_reader_token(token);
FEerror("There is no package with the name ~A.", 1, package_name);
}
package = _ecl_package_to_be_created(the_env, package_name);
}
}
/* If there are some escaped characters, it must be a symbol.
escape_intervals always has an empty interval pair at the end. */
if (package != ECL_NIL || ecl_length(escape) > 2 || length == 0)
if (package != ECL_NIL || escaped || length == 0)
goto SYMBOL;
/* The case in which the buffer is full of dots has to be especial cased. */
@ -87,7 +80,7 @@ ecl_parse_token(cl_object token, cl_object in, int flags)
goto OUTPUT;
} else {
ecl_put_reader_token(token);
FEreader_error("Dots appeared illegally.", in, 0);
FEreader_error("Dot appeared illegally.", in, 0);
}
} else {
int i;

View file

@ -576,6 +576,8 @@ extern ecl_off_t ecl_integer_to_off_t(cl_object offset);
# define TOKEN_STRING_CHAR_CMP(s,n,c) ((s)->base_string.self[n]==(c))
#endif
# define TOKEN_ESCAPE_FILLP(s) ((s)->vector.fillp)
#define ECL_READ_RETURN_IGNORABLE 3
#define ECL_READ_LIST_DOT 4
@ -665,16 +667,24 @@ extern cl_object mp_get_rwlock_write_wait(cl_object lock);
#define RTABSIZE ECL_CHAR_CODE_LIMIT /* read table size */
#endif
#define loop_across_eints(l, h, obj) { \
cl_index __ecl_ndx = obj->vector.fillp; \
cl_object *__ecl_v = obj->vector.self.t; \
#define loop_across_token(index, limit, string, object) { \
cl_object string = object->token.string; \
cl_object __ecl_lims = object->token.escape; \
cl_object *__ecl_v = __ecl_lims->vector.self.t; \
cl_index __ecl_ndx = __ecl_lims->vector.fillp; \
cl_index __ecl_idx; \
cl_fixnum l, h; \
for(__ecl_idx = 0; __ecl_idx < __ecl_ndx; __ecl_idx+=2) { \
l = ecl_fixnum(__ecl_v[__ecl_idx]); \
h = ecl_fixnum(__ecl_v[__ecl_idx+1]);
cl_fixnum index = 0; \
cl_fixnum limit, __ecl_high; \
for(__ecl_idx = 0; __ecl_idx <= __ecl_ndx; __ecl_idx+=2) { \
if (__ecl_idx == __ecl_ndx) { \
limit = __ecl_high = ecl_length(string); \
} else { \
limit = ecl_fixnum(__ecl_v[__ecl_idx]); \
__ecl_high = ecl_fixnum(__ecl_v[__ecl_idx+1]); \
} \
for(; index<limit; index++) {
#define end_loop_across_eints() }}
#define end_loop_across_token() } index=__ecl_high; }}
/* package.d */