reader: separate token parsing from token reading

Parsing the token ends in a separate file.
This commit is contained in:
Daniel Kochmański 2026-03-02 22:56:33 +01:00
parent 6828cb7005
commit 29a4a2c51a
6 changed files with 154 additions and 140 deletions

View file

@ -70,8 +70,8 @@ WRITER_OBJS = print.o printer/float_to_digits.o printer/float_to_string.o
printer/write_list.o printer/write_code.o printer/write_sse.o \
printer/print_unreadable.o
READER_OBJS = readtable.o reader.o read.o \
reader/rtab_cl.o reader/parse_integer.o reader/parse_number.o
READER_OBJS = readtable.o reader.o read.o reader/rtab_cl.o \
reader/parse_token.o reader/parse_integer.o reader/parse_number.o
STREAM_OBJS = stream.o file.o streams/strm_os.o streams/strm_clos.o \
streams/strm_string.o streams/strm_composite.o streams/strm_common.o \

View file

@ -7,6 +7,7 @@
* Copyright (c) 1984 Taiichi Yuasa and Masami Hagiya
* Copyright (c) 1990 Giuseppe Attardi
* Copyright (c) 2001 Juan Jose Garcia Ripoll
* Copyright (c) 2016 Daniel Kochmański
*
* See file 'LICENSE' for the copyright details.
*
@ -27,17 +28,6 @@
#define read_suppress (ecl_symbol_value(@'*read-suppress*') != ECL_NIL)
#define loop_across_eints(l, h, obj) { \
cl_index __ecl_ndx = obj->vector.fillp; \
cl_object *__ecl_v = obj->vector.self.t; \
cl_index __ecl_idx; \
cl_fixnum l, h; \
for(__ecl_idx = 0; __ecl_idx < __ecl_ndx; __ecl_idx+=2) { \
l = ecl_fixnum(__ecl_v[__ecl_idx]); \
h = ecl_fixnum(__ecl_v[__ecl_idx+1]);
#define end_loop_across_eints() }}
static cl_object
ecl_make_token()
{
@ -151,7 +141,7 @@ ecl_dispatch_reader_fun(cl_object in, cl_object dc)
}
cl_object
ecl_read_only_token(cl_object in, bool escape_first_p)
ecl_read_token(cl_object in, bool escape_first_p)
{
int c;
cl_object token, string, escape;
@ -250,131 +240,10 @@ ecl_read_only_token(cl_object in, bool escape_first_p)
ecl_return1(the_env, token);
}
cl_object
ecl_read_token(cl_object in, int flags)
{
cl_fixnum str_i;
cl_object x, token, string, escape;
int c, base;
cl_object package, package_name, symbol_name;
cl_index length, i, sym_start, pack_end;
int colon, intern_flag;
bool external_symbol;
cl_env_ptr the_env = ecl_process_env();
bool suppress = read_suppress;
token = ecl_read_only_token(in, 0);
string = token->token.string;
escape = token->token.escape;
package = package_name = ECL_NIL;
str_i = sym_start = pack_end = colon = 0;
length = ecl_length(string);
external_symbol = 0;
if (suppress) {
x = ECL_NIL;
goto OUTPUT;
}
loop_across_eints(low_limit, high_limit, escape) {
for(; str_i<low_limit; str_i++) {
c = ecl_char(string, str_i);
if (c == ':') {
if(!Null(package))
FEreader_error("Unexpected colon character.", in, 0);
if (colon > 1) FEreader_error("Too many colons.", in, 0);
if (colon < 1) pack_end = str_i;
colon++;
sym_start = str_i+1;
continue;
} else if (colon) {
external_symbol = (colon == 1);
if (pack_end == 0) {
package = cl_core.keyword_package;
external_symbol = 0;
} else {
package_name = ecl_subseq(string, 0, pack_end);
package = ecl_find_package_nolock(package_name);
}
if (Null(package)) {
/* When loading binary files, we sometimes must create
symbols whose package has not yet been maked. We
allow it, but later on in ecl_init_module we make sure that
all referenced packages have been properly built.
*/
unlikely_if (Null(the_env->packages_to_be_created_p)) {
ecl_put_reader_token(token);
FEerror("There is no package with the name ~A.", 1, package_name);
}
package = _ecl_package_to_be_created(the_env, package_name);
}
}
}
str_i=high_limit;
} end_loop_across_eints();
/* If there are some escaped characters, it must be a symbol.
escape_intervals always has an empty interval pair at the end. */
if (package != ECL_NIL || ecl_length(escape) > 2 || length == 0)
goto SYMBOL;
/* The case in which the buffer is full of dots has to be especial cased */
if (length == 1 && TOKEN_STRING_CHAR_CMP(string, 0, '.')) {
if (flags == ECL_READ_LIST_DOT) {
x = @'si::.';
goto OUTPUT;
} else {
ecl_put_reader_token(token);
FEreader_error("Dots appeared illegally.", in, 0);
}
} else {
int i;
for (i = 0; i < length; i++) {
if (!TOKEN_STRING_CHAR_CMP(string,i,'.'))
goto MAYBE_NUMBER;
}
ecl_put_reader_token(token);
FEreader_error("Dots appeared illegally.", in, 0);
}
MAYBE_NUMBER:
/* Here we try to parse a number from the content of the buffer */
base = ecl_current_read_base();
if ((base <= 10) && ecl_alpha_char_p(TOKEN_STRING_CHAR(string, 0)))
goto SYMBOL;
x = ecl_parse_number(string, 0, TOKEN_STRING_FILLP(string), &i, base);
unlikely_if (x == ECL_NIL) {
ecl_put_reader_token(token);
FEreader_error("Syntax error when reading number.~%Offending string: ~S.",
in, 1, string);
}
if (x != OBJNULL && length == i)
goto OUTPUT;
SYMBOL:
symbol_name = ecl_subseq(string, sym_start, length);
if (external_symbol) {
x = ecl_find_symbol(symbol_name, package, &intern_flag);
unlikely_if (intern_flag != ECL_EXTERNAL) {
ecl_put_reader_token(token);
FEreader_error("Cannot find the external symbol ~A in ~S.", in,
2, symbol_name, package);
}
} else {
if (package == ECL_NIL) {
package = ecl_current_package();
}
/* INV: cl_make_symbol() copies the string */
x = ecl_intern(symbol_name, package, &intern_flag);
}
OUTPUT:
ecl_put_reader_token(token);
ecl_return1(the_env, x);
}
cl_object
ecl_read_object_with_delimiter(cl_object in, int delimiter, int flags)
{
cl_object x;
cl_object x, token;
int c;
enum ecl_chattrib a;
cl_env_ptr the_env = ecl_process_env();
@ -416,5 +285,12 @@ ecl_read_object_with_delimiter(cl_object in, int delimiter, int flags)
return o;
}
ecl_unread_char(c, in);
return ecl_read_token(in, flags);
token = ecl_read_token(in, 0);
if (suppress) {
x = ECL_NIL;
} else {
x = ecl_parse_token(token, in, flags);
}
ecl_put_reader_token(token);
return x;
}

127
src/c/reader/parse_token.d Normal file
View file

@ -0,0 +1,127 @@
/* -*- Mode: C; c-basic-offset: 2; indent-tabs-mode: nil -*- */
/* vim: set filetype=c tabstop=2 shiftwidth=2 expandtab: */
/*
*
* Copyright (c) 1984 Taiichi Yuasa and Masami Hagiya
* Copyright (c) 1990 Giuseppe Attardi
* Copyright (c) 2001 Juan Jose Garcia Ripoll
* Copyright (c) 2016 Daniel Kochmański
*
* See file 'LICENSE' for the copyright details.
*
*/
#define ECL_INCLUDE_MATH_H
#include <ecl/ecl.h>
#include <ecl/internal.h>
cl_object
ecl_parse_token(cl_object token, cl_object in, int flags)
{
cl_fixnum str_i;
cl_index length, i, sym_start, pack_end;
int colon, intern_flag, c, base;
bool external_symbol;
cl_object package, package_name, symbol_name, string, escape, x;
cl_env_ptr the_env = ecl_process_env();
string = token->token.string;
escape = token->token.escape;
package = package_name = symbol_name = x = ECL_NIL;
str_i = sym_start = pack_end = colon = 0;
length = ecl_length(string);
external_symbol = 0;
loop_across_eints(low_limit, high_limit, escape) {
for(; str_i<low_limit; str_i++) {
c = ecl_char(string, str_i);
if (c == ':') {
if(!Null(package))
FEreader_error("Unexpected colon character.", in, 0);
if (colon > 1) FEreader_error("Too many colons.", in, 0);
if (colon < 1) pack_end = str_i;
colon++;
sym_start = str_i+1;
continue;
} else if (colon) {
external_symbol = (colon == 1);
if (pack_end == 0) {
package = cl_core.keyword_package;
external_symbol = 0;
} else {
package_name = ecl_subseq(string, 0, pack_end);
package = ecl_find_package_nolock(package_name);
}
if (Null(package)) {
/* When loading binary files, we sometimes must create
symbols whose package has not yet been maked. We
allow it, but later on in ecl_init_module we make sure that
all referenced packages have been properly built.
*/
unlikely_if (Null(the_env->packages_to_be_created_p)) {
ecl_put_reader_token(token);
FEerror("There is no package with the name ~A.", 1, package_name);
}
package = _ecl_package_to_be_created(the_env, package_name);
}
}
}
str_i=high_limit;
} end_loop_across_eints();
/* If there are some escaped characters, it must be a symbol.
escape_intervals always has an empty interval pair at the end. */
if (package != ECL_NIL || ecl_length(escape) > 2 || length == 0)
goto SYMBOL;
/* The case in which the buffer is full of dots has to be especial cased */
if (length == 1 && TOKEN_STRING_CHAR_CMP(string, 0, '.')) {
if (flags == ECL_READ_LIST_DOT) {
x = @'si::.';
goto OUTPUT;
} else {
ecl_put_reader_token(token);
FEreader_error("Dots appeared illegally.", in, 0);
}
} else {
int i;
for (i = 0; i < length; i++) {
if (!TOKEN_STRING_CHAR_CMP(string,i,'.'))
goto MAYBE_NUMBER;
}
ecl_put_reader_token(token);
FEreader_error("Dots appeared illegally.", in, 0);
}
MAYBE_NUMBER:
/* Here we try to parse a number from the content of the buffer */
base = ecl_current_read_base();
if ((base <= 10) && ecl_alpha_char_p(TOKEN_STRING_CHAR(string, 0)))
goto SYMBOL;
x = ecl_parse_number(string, 0, TOKEN_STRING_FILLP(string), &i, base);
unlikely_if (x == ECL_NIL) {
ecl_put_reader_token(token);
FEreader_error("Syntax error when reading number.~%Offending string: ~S.",
in, 1, string);
}
if (x != OBJNULL && length == i)
goto OUTPUT;
SYMBOL:
symbol_name = ecl_subseq(string, sym_start, length);
if (external_symbol) {
x = ecl_find_symbol(symbol_name, package, &intern_flag);
unlikely_if (intern_flag != ECL_EXTERNAL) {
ecl_put_reader_token(token);
FEreader_error("Cannot find the external symbol ~A in ~S.", in,
2, symbol_name, package);
}
} else {
if (package == ECL_NIL) {
package = ecl_current_package();
}
/* INV: cl_make_symbol() copies the string */
x = ecl_intern(symbol_name, package, &intern_flag);
}
OUTPUT:
return x;
}

View file

@ -219,7 +219,7 @@ sharp_backslash_reader(cl_object in, cl_object c, cl_object d)
FEreader_error("~S is an illegal CHAR-FONT.", in, 1, d);
}
}
token = ecl_read_only_token(in, 1);
token = ecl_read_token(in, 1);
string = token->token.string;
escape = token->token.escape;
if (TOKEN_STRING_FILLP(string) == 1) {

View file

@ -1579,8 +1579,8 @@ extern ECL_API cl_object ecl_read_eval(cl_object in);
extern ECL_API cl_object ecl_read_object_non_recursive(cl_object in);
extern ECL_API cl_object ecl_read_object_with_delimiter(cl_object in, int del, int flags);
extern ECL_API cl_object ecl_read_object(cl_object in);
extern ECL_API cl_object ecl_read_only_token(cl_object in, bool esc);
extern ECL_API cl_object ecl_read_token(cl_object in, int flags);
extern ECL_API cl_object ecl_read_token(cl_object in, bool esc);
extern ECL_API cl_object ecl_parse_token(cl_object token, cl_object in, int flags);
extern ECL_API cl_object ecl_parse_number(cl_object s, cl_index start, cl_index end, cl_index *ep, unsigned int radix);
extern ECL_API cl_object ecl_parse_integer(cl_object s, cl_index start, cl_index end, cl_index *ep, unsigned int radix);
extern ECL_API bool ecl_invalid_character_p(int c);

View file

@ -665,6 +665,17 @@ extern cl_object mp_get_rwlock_write_wait(cl_object lock);
#define RTABSIZE ECL_CHAR_CODE_LIMIT /* read table size */
#endif
#define loop_across_eints(l, h, obj) { \
cl_index __ecl_ndx = obj->vector.fillp; \
cl_object *__ecl_v = obj->vector.self.t; \
cl_index __ecl_idx; \
cl_fixnum l, h; \
for(__ecl_idx = 0; __ecl_idx < __ecl_ndx; __ecl_idx+=2) { \
l = ecl_fixnum(__ecl_v[__ecl_idx]); \
h = ecl_fixnum(__ecl_v[__ecl_idx+1]);
#define end_loop_across_eints() }}
/* package.d */
extern cl_object _ecl_package_to_be_created(const cl_env_ptr env, cl_object name);