diff --git a/msvc/ecl-threads.def b/msvc/ecl-threads.def index ece8ec343..08dbb8195 100755 --- a/msvc/ecl-threads.def +++ b/msvc/ecl-threads.def @@ -902,8 +902,8 @@ EXPORTS read_object_non_recursive read_object - parse_number - parse_integer + ecl_parse_number + ecl_parse_integer ecl_invalid_character_p copy_readtable ecl_current_readtable @@ -1008,6 +1008,8 @@ EXPORTS ecl_string_push_extend get_string_start_end ecl_fits_in_base_string + ecl_char + ecl_char_set ; structure.c diff --git a/msvc/ecl.def b/msvc/ecl.def index 87fb928fe..24895eb63 100644 --- a/msvc/ecl.def +++ b/msvc/ecl.def @@ -908,8 +908,8 @@ EXPORTS read_object_non_recursive read_object - parse_number - parse_integer + ecl_parse_number + ecl_parse_integer ecl_invalid_character_p copy_readtable ecl_current_readtable @@ -1014,6 +1014,8 @@ EXPORTS ecl_string_push_extend get_string_start_end ecl_fits_in_base_string + ecl_char + ecl_char_set ; structure.c diff --git a/src/CHANGELOG b/src/CHANGELOG index 78647eb9a..190f9fee7 100644 --- a/src/CHANGELOG +++ b/src/CHANGELOG @@ -115,10 +115,11 @@ ECL 1.0: - The FFI will signal a type error when converting extended characters to the C types "char" or "unsigned char". - - EQUAL and EQUALP did not work with extended strings. - - Unicode support for ALPHA-CHAR-P, ALPHANUMERICP, CHAR-UPCASE, CHAR-DOWNCASE, - etc, based on the C library functions towupper, towlower. + etc, based on the ISO C99 library functions for wide characters. + + - Following functions fixed to work with Unicode strings: EQUAL, EQUALP, + PARSE-INTEGER. * Other visible changes: @@ -163,20 +164,22 @@ ECL 1.0: - C functions which disappear: si_set_compiled_function_name(), si_extended_string_concatenate(), assert_type_string(), - assert_type_character(), assert_type_symbol(), make_symbol(). + assert_type_character(), assert_type_symbol(), make_symbol(), + parse_number(), parse_integer(). - Lisp functions which disappear: si:set-compiled-function-name, si:extended-string-concatenate, si:list-nth, si:rplaca-nthcdr. - New C functions: ecl_stream_to_handle(), ecl_base_char_code(), ecl_type_error(), ecl_check_cl_type(), ecl_check_type_string(), - ecl_fixnum_in_range(), ecl_stringp(). + ecl_fixnum_in_range(), ecl_stringp(), ecl_parse_number(), + ecl_parse_integer(), ecl_char(), ecl_char_set(). - New Lisp functions: si:wrong-type-argument. - - Functions renamed: backup_fopen() -> ecl_backup_fopen() - char_code() -> ecl_char_code(), cl_log1() -> ecl_log1(), - cl_log2() -> ecl_log2(), NUMBER_TYPE() -> ECL_NUMBER_TYPE_P() + - Functions renamed: backup_fopen() -> ecl_backup_fopen() char_code() -> + ecl_char_code(), cl_log1() -> ecl_log1(), cl_log2() -> ecl_log2(), + NUMBER_TYPE() -> ECL_NUMBER_TYPE_P(). * Contributed code: diff --git a/src/c/character.d b/src/c/character.d index da17f9c0f..c5adb05aa 100644 --- a/src/c/character.d +++ b/src/c/character.d @@ -550,10 +550,9 @@ cl_name_char(cl_object name) c = cl_char(name, MAKE_FIXNUM(0)); if (c == CODE_CHAR('u') || c == CODE_CHAR('U')) { /* FIXME! This only works with base-strings */ - cl_index end = name->base_string.fillp - 1; + cl_index end = name->base_string.fillp; cl_index real_end = end; - c = parse_integer(name->base_string.self + 1, end, - &real_end, 16); + c = ecl_parse_integer(name, 1, end, &real_end, 16); if ((real_end != end) || !FIXNUMP(c)) { c = Cnil; } else { diff --git a/src/c/format.d b/src/c/format.d index 9697534bf..b9391582e 100644 --- a/src/c/format.d +++ b/src/c/format.d @@ -21,9 +21,13 @@ #if !defined(ECL_CMU_FORMAT) -#ifdef ECL_UNICODE -#error "The old version of FORMAT does not support Unicode" -#endif +/* + * This code is broken because of several reasons: + * 1) It does not support Unicode + * 2) It does not support pretty printing + * 3) It uses the old version of parse_integer() + */ +#error "The old version of FORMAT is broken" #define FMT_MAX_PARAM 8 typedef struct format_stack_struct { diff --git a/src/c/read.d b/src/c/read.d index b5df6095a..c9a1e32ea 100644 --- a/src/c/read.d +++ b/src/c/read.d @@ -34,6 +34,9 @@ #define cat(rtbl,c) ((rtbl)->readtable.table[c].syntax_type) #define read_suppress (SYM_VAL(@'*read-suppress*') != Cnil) +static struct ecl_readtable_entry* +read_table_entry(cl_object rdtbl, cl_object c); + /* FIXME! *READ-EVAL* is not taken into account */ cl_object @@ -309,7 +312,7 @@ BEGIN: base = ecl_current_read_base(); if ((base <= 10) && isalpha(token->base_string.self[0])) goto SYMBOL; - x = parse_number(token->base_string.self, token->base_string.fillp, &i, base); + x = ecl_parse_number(token, 0, token->base_string.fillp, &i, base); if (x == Cnil) FEreader_error("Syntax error when reading number.~%Offending string: ~S.", in, 1, token); @@ -365,8 +368,8 @@ read_object(cl_object in) #define basep(d) (d <= 36) /* - parse_number(s, end, ep, radix) parses C string s - up to (but not including) s[end] + ecl_parse_number(str, start, end, ep, radix) parses C string str + up to (but not including) str[end] using radix as the radix for the rational number. (For floating numbers, the radix is ignored and replaced with 10) When parsing succeeds, @@ -375,27 +378,26 @@ read_object(cl_object in) If not, OBJNULL is returned. */ cl_object -parse_number(const char *s, cl_index end, cl_index *ep, int radix) +ecl_parse_number(cl_object str, cl_index start, cl_index end, + cl_index *ep, unsigned int radix) { - cl_index i, j, exp_marker_loc = 0; + cl_index i, exp_marker_loc = 0; bool is_float = 0; - for (i=0; i < end; i++) { - char c = s[i]; + if (end <= start) { + *ep = start; + return OBJNULL; + } + for (i = start; i < end; i++) { + cl_index c = ecl_char(str, i); if (c == '/') { + /* A slash separates two integer numbers forming a ratio */ cl_object num, den; - num = parse_number(s, i, &j, radix); - if (num == OBJNULL || (j < i) || - (!FIXNUMP(num) && type_of(num) != t_bignum)) - { - *ep = j; + num = ecl_parse_integer(str, start, i, ep, radix); + if (num == OBJNULL || (*ep < i)) { return OBJNULL; } - i++; - den = parse_number(s+i, end-i, ep, radix); - *ep += i; - if (den == OBJNULL || (*ep < end) || - (!FIXNUMP(den) && type_of(den) != t_bignum)) - { + den = ecl_parse_integer(str, i+1, end, ep, radix); + if (den == OBJNULL || (*ep < end)) { return OBJNULL; } if (den == MAKE_FIXNUM(0)) { @@ -403,17 +405,20 @@ parse_number(const char *s, cl_index end, cl_index *ep, int radix) } return make_ratio(num, den); } else if (c == '.') { + /* A trailing dot denotes base-10 integer number */ radix = 10; if (i == (end-1)) { - cl_object aux = parse_integer(s, end-1, ep, radix); - (*ep)++; + cl_object aux = + ecl_parse_integer(str, 0, i, ep, radix); + if (*ep == i) { + *ep = end; + } return aux; - } else { - is_float = 1; } + is_float = 1; } else if (digitp(c, radix) < 0) { if (ecl_exponent_marker_p(c)) { - exp_marker_loc = i; + exp_marker_loc = i - start; is_float = 1; break; } @@ -424,17 +429,18 @@ parse_number(const char *s, cl_index end, cl_index *ep, int radix) } } if (!is_float) { - return parse_integer(s, end, ep, radix); + return ecl_parse_integer(str, start, end, ep, radix); + } else if (radix != 10) { + /* Can only parse floating point numbers in decimal format */ + *ep = 1; + return OBJNULL; } else { /* We use strtod() for parsing floating point numbers * accurately. However, this routine only accepts character * 'e' or 'E' as exponent markers and we have to make a copy * of the number with this exponent marker. */ -#ifdef __GNUC__ - char buffer[end+1]; -#else - char *buffer = (char*)cl_alloc_atomic(end+1); -#endif + cl_index length = end - start; + char *buffer = (char*)cl_alloc_atomic(length+1); char *parse_end; char exp_marker; cl_object output; @@ -444,11 +450,25 @@ parse_number(const char *s, cl_index end, cl_index *ep, int radix) #else double d; #endif - memcpy(buffer, s, end); - buffer[end] = '\0'; +#ifdef ECL_UNICODE + if (type_of(str) == t_string) { + for (i = start; i < end; i++) { + cl_index c = ecl_char(str, i); + if (c > 255) { + *ep = i; + return OBJNULL; + } + buffer[i] = c; + } + } else +#endif + { + memcpy(buffer, str->base_string.self + start, length); + } + buffer[length] = '\0'; if (exp_marker_loc) { + exp_marker = buffer[exp_marker_loc]; buffer[exp_marker_loc] = 'e'; - exp_marker = s[exp_marker_loc]; } else { exp_marker = ecl_current_read_default_float_format(); } @@ -457,8 +477,8 @@ parse_number(const char *s, cl_index end, cl_index *ep, int radix) #else d = strtod(buffer, &parse_end); #endif - *ep = (parse_end - buffer); - if (*ep == 0) { + *ep = (parse_end - buffer) + start; + if (*ep == start) { output = OBJNULL; goto OUTPUT; } @@ -489,44 +509,47 @@ parse_number(const char *s, cl_index end, cl_index *ep, int radix) output = OBJNULL; } OUTPUT: -#ifndef __GNUC__ - cl_dealloc(s, end+1); -#endif + cl_dealloc(buffer, length+1); return output; } } cl_object -parse_integer(const char *s, cl_index end, cl_index *ep, int radix) +ecl_parse_integer(cl_object str, cl_index start, cl_index end, + cl_index *ep, unsigned int radix) { - cl_object x; int sign, d; - cl_object integer_part; - cl_index i; + cl_object integer_part, output; + cl_index i, c; - i = 0; - sign = 1; - if (s[i] == '+') - i++; - else if (s[i] == '-') { - sign = -1; - i++; + if (start >= end || !basep(radix)) { + *ep = i; + return OBJNULL; } - if (i >= end || !basep(radix) || (d = digitp(s[i], radix)) < 0) { - *ep = i; - return(OBJNULL); + sign = 1; + c = ecl_char(str, start); + if (c == '+') { + start++; + } else if (c == '-') { + sign = -1; + start++; } integer_part = big_register0_get(); - do { + for (i = start; i < end; i++) { + c = ecl_char(str, i); + d = digitp(c, radix); + if (d < 0) { + break; + } big_mul_ui(integer_part, radix); big_add_ui(integer_part, d); - i++; - } while (i < end && (d = digitp(s[i], radix)) >= 0); - if (sign < 0) + } + if (sign < 0) { big_complement(integer_part); - x = big_register_normalize(integer_part); + } + output = big_register_normalize(integer_part); *ep = i; - return(x); + return (i == start)? OBJNULL : output; } static cl_object @@ -877,7 +900,7 @@ sharp_left_parenthesis_reader(cl_object in, cl_object c, cl_object d) if (aux == OBJNULL) break; if (i >= dim) { - FEreader_error("Vector larger than specified length, ~D.", 1, d); + FEreader_error("Vector larger than specified length, ~D.", in, 1, d); } aset1(v, i, last = aux); } @@ -1024,7 +1047,7 @@ read_number(cl_object in, int radix, cl_object macro_char) if (token == Cnil) { x = Cnil; } else { - x = parse_number(token->base_string.self, token->base_string.fillp, &i, radix); + x = ecl_parse_number(token, 0, token->base_string.fillp, &i, radix); if (x == OBJNULL || x == Cnil || i != token->base_string.fillp) { FEreader_error("Cannot parse the #~A readmacro.", in, 1, macro_char); @@ -1619,40 +1642,43 @@ do_read_delimited_list(int d, cl_object in, bool proper_list) &aux x) cl_index s, e, ep; cl_object rtbl = ecl_current_readtable(); -@ - /* FIXME! PARSE-INTEGER restricted to base-strings */ - strng = ecl_check_cl_type(@'parse-integer', strng, t_base_string); +@ { + strng = ecl_check_type_string(@'parse-integer', strng); get_string_start_end(strng, start, end, &s, &e); if (!FIXNUMP(radix) || fix(radix) < 2 || fix(radix) > 36) FEerror("~S is an illegal radix.", 1, radix); - while (rtbl->readtable.table[strng->base_string.self[s]].syntax_type - == cat_whitespace && s < e) + while (s < e && + read_table_entry(rtbl, cl_char(strng, MAKE_FIXNUM(s)))->syntax_type + == cat_whitespace) { s++; + } if (s >= e) { if (junk_allowed != Cnil) @(return Cnil MAKE_FIXNUM(s)) else goto CANNOT_PARSE; } - x = parse_integer(strng->base_string.self+s, e-s, &ep, fix(radix)); + x = ecl_parse_integer(strng, s, e, &ep, fix(radix)); if (x == OBJNULL) { - if (junk_allowed != Cnil) - @(return Cnil MAKE_FIXNUM(ep+s)) - else + if (junk_allowed != Cnil) { + @(return Cnil MAKE_FIXNUM(ep)); + } else { goto CANNOT_PARSE; + } } - if (junk_allowed != Cnil) - @(return x MAKE_FIXNUM(ep+s)) - for (s += ep ; s < e; s++) - if (rtbl->readtable.table[strng->base_string.self[s]].syntax_type - != cat_whitespace) - goto CANNOT_PARSE; - @(return x MAKE_FIXNUM(e)) - -CANNOT_PARSE: - FEparse_error("Cannot parse an integer in the string ~S.", Cnil, 1, strng); -@) + if (junk_allowed != Cnil) { + @(return x MAKE_FIXNUM(ep)); + } + for (s = ep; s < e; s++) { + if (read_table_entry(rtbl, cl_char(strng, MAKE_FIXNUM(s)))->syntax_type + != cat_whitespace) { +CANNOT_PARSE: FEparse_error("Cannot parse an integer in the string ~S.", + Cnil, 1, strng); + } + } + @(return x MAKE_FIXNUM(e)); +} @) @(defun read_byte (binary_input_stream &optional (eof_errorp Ct) eof_value) cl_object c; @@ -1736,12 +1762,25 @@ cl_readtablep(cl_object readtable) @(return ((type_of(readtable) == t_readtable)? Ct : Cnil)) } +#ifdef ECL_UNICODE +static struct ecl_readtable_entry default_readtable_entry; +#endif + static struct ecl_readtable_entry* read_table_entry(cl_object rdtbl, cl_object c) { /* INV: ecl_char_code() checks the type of `c' */ + cl_index code = ecl_char_code(c); assert_type_readtable(rdtbl); - return &(rdtbl->readtable.table[ecl_char_code(c)]); +#ifdef ECL_UNICODE + if (!BASE_CHAR_CODE_P(code)) { + default_readtable_entry.syntax_type = cat_constituent; + default_readtable_entry.macro = Cnil; + default_readtable_entry.dispatch_table = NULL; + return &default_readtable_entry; + } +#endif + return &(rdtbl->readtable.table[code]); } bool @@ -2102,4 +2141,3 @@ read_VV(cl_object block, void (*entry_point)(cl_object)) return block; } - diff --git a/src/c/string.d b/src/c/string.d index dfc51cb22..1e8c21109 100644 --- a/src/c/string.d +++ b/src/c/string.d @@ -303,19 +303,25 @@ cl_object cl_char(cl_object object, cl_object index) { cl_index position = object_to_index(index); + @(return CODE_CHAR(ecl_char(object, position))) +} + +cl_index +ecl_char(cl_object object, cl_index index) +{ /* CHAR bypasses fill pointers when accessing strings */ AGAIN: switch(type_of(object)) { #ifdef ECL_UNICODE case t_string: - if (position >= object->string.dim) + if (index >= object->string.dim) illegal_index(object, index); - @(return object->string.self[position]) + return CHAR_CODE(object->string.self[index]); #endif case t_base_string: - if (position >= object->base_string.dim) + if (index >= object->base_string.dim) illegal_index(object, index); - @(return CODE_CHAR(object->base_string.self[position])) + return object->base_string.self[index]; default: object = ecl_type_error(@'char',"",object,@'string'); goto AGAIN; @@ -326,23 +332,31 @@ cl_object si_char_set(cl_object object, cl_object index, cl_object value) { cl_index position = object_to_index(index); + cl_index c = ecl_char_code(value); + ecl_char_set(object, position, c); + @(return value) +} + +void +ecl_char_set(cl_object object, cl_index index, cl_index value) +{ AGAIN: /* CHAR bypasses fill pointers when accessing strings */ switch(type_of(object)) { #ifdef ECL_UNICODE case t_string: - if (position >= object->string.dim) + if (index >= object->string.dim) illegal_index(object, index); if (!CHARACTERP(value)) FEtype_error_character(value); - object->string.self[position] = value; - @(return object->string.self[position]) + object->string.self[index] = CODE_CHAR(value); + break; #endif case t_base_string: - if (position >= object->base_string.dim) + if (index >= object->base_string.dim) illegal_index(object, index); /* INV: ecl_char_code() checks type of value */ - object->base_string.self[position] = ecl_char_code(value); - @(return value) + object->base_string.self[index] = value; + break; default: object = ecl_type_error(@'si::char-set', "", object, @'string'); goto AGAIN; diff --git a/src/h/external.h b/src/h/external.h index 91d090752..f9e46bd48 100644 --- a/src/h/external.h +++ b/src/h/external.h @@ -1212,8 +1212,8 @@ extern cl_object cl_get_dispatch_macro_character _ARGS((cl_narg narg, cl_object extern cl_object read_object_non_recursive(cl_object in); extern cl_object read_object(cl_object in); -extern cl_object parse_number(const char *s, cl_index end, cl_index *ep, int radix); -extern cl_object parse_integer(const char *s, cl_index end, cl_index *ep, int radix); +extern cl_object ecl_parse_number(cl_object s, cl_index start, cl_index end, cl_index *ep, unsigned int radix); +extern cl_object ecl_parse_integer(cl_object s, cl_index start, cl_index end, cl_index *ep, unsigned int radix); extern bool ecl_invalid_character_p(int c); extern cl_object copy_readtable(cl_object from, cl_object to); extern cl_object ecl_current_readtable(void); @@ -1222,7 +1222,6 @@ extern char ecl_current_read_default_float_format(void); extern cl_object c_string_to_object(const char *s); extern cl_object read_VV(cl_object block, void (*entry)(cl_object)); - /* reference.c */ extern cl_object cl_fboundp(cl_object sym); @@ -1317,6 +1316,8 @@ extern bool member_char(int c, cl_object char_bag); extern int ecl_string_push_extend(cl_object s, int c); extern void get_string_start_end(cl_object s, cl_object start, cl_object end, cl_index *ps, cl_index *pe); extern bool ecl_fits_in_base_string(cl_object s); +extern cl_index ecl_char(cl_object s, cl_index i); +extern void ecl_char_set(cl_object s, cl_index i, cl_index c); /* structure.c */ diff --git a/src/h/object.h b/src/h/object.h index df72911a4..d8642e403 100644 --- a/src/h/object.h +++ b/src/h/object.h @@ -65,6 +65,7 @@ typedef cl_object (*cl_objectfn_fixed)(); #define CHARACTERP(obje) (((cl_fixnum)(obje)) & CHARACTER_TAG) #ifdef ECL_UNICODE #define BASE_CHAR_P(obje) ((((cl_fixnum)(obje)) & 0xFFFFFC03) == CHARACTER_TAG) +#define BASE_CHAR_CODE_P(x) ((x & ~((cl_fixnum)0xFF)) == 0) #define CODE_CHAR(c) ((cl_object)(((cl_fixnum)(c << 2)|CHARACTER_TAG))) #define CHAR_CODE(obje) (((cl_fixnum)(obje)) >> 2) #else