From 3938eb8893362063ab1fda91b7ebf5b683efab84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Kochma=C5=84ski?= Date: Sun, 3 May 2015 11:36:20 +0200 Subject: [PATCH] unicode: Improve unicode handling for characters. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Additional sanity checks and bugfixes in case of providing extended strings to character low-level functions. Signed-off-by: Daniel Kochmański --- src/c/char_ctype.d | 3 +++ src/c/character.d | 33 +++++++++++++++++++++++---------- 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/src/c/char_ctype.d b/src/c/char_ctype.d index 57679df3f..81781f863 100644 --- a/src/c/char_ctype.d +++ b/src/c/char_ctype.d @@ -5,6 +5,7 @@ /* Copyright (c) 1984, Taiichi Yuasa and Masami Hagiya. Copyright (c) 1990, Giuseppe Attardi. + Copyright (c) 2015, Daniel Kochmański. ECL is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public @@ -86,6 +87,8 @@ ucd_char_data(ecl_character code) static cl_index ucd_value_0(ecl_character code) { + if (ecl_unlikely((code >= 0x110000))) + FEerror("The value ~A is not of type (MOD 1114112)", 1, code); return ucd_char_data(code)[0]; } diff --git a/src/c/character.d b/src/c/character.d index 26b04b62a..a7f126702 100644 --- a/src/c/character.d +++ b/src/c/character.d @@ -5,6 +5,7 @@ /* Copyright (c) 1984, Taiichi Yuasa and Masami Hagiya. Copyright (c) 1990, Giuseppe Attardi. + Copyright (c) 2015, Daniel Kochmański. ECL is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public @@ -103,17 +104,29 @@ ecl_string_case(cl_object s) { int upcase; cl_index i; - const ecl_base_char *text = (ecl_base_char*)s->base_string.self; - for (i = 0, upcase = 0; i <= s->base_string.dim; i++) { - if (ecl_upper_case_p(text[i])) { - if (upcase < 0) - return 0; - upcase = +1; - } else if (ecl_lower_case_p(text[i])) { - if (upcase > 0) - return 0; - upcase = -1; + ecl_base_char *text; + + switch (ecl_t_of(s)) { +#ifdef ECL_UNICODE + case t_string: + s = si_coerce_to_base_string(s); +#endif + case t_base_string: + text = (ecl_base_char*)s->base_string.self; + for (i = 0, upcase = 0; i < s->base_string.dim; i++) { + if (ecl_upper_case_p(text[i])) { + if (upcase < 0) + return 0; + upcase = +1; + } else if (ecl_lower_case_p(text[i])) { + if (upcase > 0) + return 0; + upcase = -1; + } } + break; + default: + FEwrong_type_argument(@[string], s); } return upcase; }