mirror of
https://gitlab.com/embeddable-common-lisp/ecl.git
synced 2025-12-15 15:21:03 -08:00
Added the Unicode character database to the C library
This commit is contained in:
parent
b6519e5c7a
commit
ebafa5f275
14 changed files with 81861 additions and 6 deletions
123
contrib/unicode/load-names.lisp
Normal file
123
contrib/unicode/load-names.lisp
Normal file
|
|
@ -0,0 +1,123 @@
|
|||
(defun split-words (text &key (set '(#\Space)) (exclude t))
|
||||
(loop with start = 0
|
||||
with output = '()
|
||||
with elt-type = (array-element-type text)
|
||||
for i from 0 below (length text)
|
||||
for c across text
|
||||
when (member c set)
|
||||
do (setf output (list* (make-array (+ (- i start) (if exclude 0 1))
|
||||
:element-type elt-type
|
||||
:displaced-to text
|
||||
:displaced-index-offset start)
|
||||
output)
|
||||
start (1+ i))
|
||||
finally (return (nreverse (list* (make-array (- i start)
|
||||
:element-type elt-type
|
||||
:displaced-to text
|
||||
:displaced-index-offset start)
|
||||
output)))))
|
||||
|
||||
(defun encode-words (words hash)
|
||||
(loop for word in words
|
||||
collect (or (gethash word hash)
|
||||
(let* ((word (copy-seq word))
|
||||
(ndx (hash-table-count hash)))
|
||||
(setf (gethash word hash) (1+ ndx))))))
|
||||
|
||||
(defun fixup-hangul-syllables (dictionary)
|
||||
;; "Hangul Syllable Composition, Unicode 5.1 section 3-12"
|
||||
(let* ((sbase #xac00)
|
||||
(lbase #x1100)
|
||||
(vbase #x1161)
|
||||
(tbase #x11a7)
|
||||
(scount 11172)
|
||||
(lcount 19)
|
||||
(vcount 21)
|
||||
(tcount 28)
|
||||
(ncount (* vcount tcount))
|
||||
(table (make-hash-table)))
|
||||
(with-open-file (*standard-input*
|
||||
(make-pathname :name "Jamo" :type "txt"))
|
||||
(loop for line = (read-line nil nil)
|
||||
while line
|
||||
if (position #\; line)
|
||||
do (add-jamo-information line table)))
|
||||
(loop for sindex from 0 below scount
|
||||
for l = (+ lbase (floor sindex ncount))
|
||||
for v = (+ vbase (floor (mod sindex ncount) tcount))
|
||||
for tee = (+ tbase (mod sindex tcount))
|
||||
for name = (list* "HANGUL_" "SYLLABLE_"
|
||||
(gethash l table) (gethash v table)
|
||||
(unless (= tee tbase) (list (gethash tee table))))
|
||||
for code = (+ sbase sindex)
|
||||
collect (list* code (apply #'concatenate 'string name)
|
||||
(encode-words name dictionary)))))
|
||||
|
||||
(defun add-jamo-information (line table)
|
||||
(let* ((split (split-words line :set '(#\;) :exclude t))
|
||||
(code (parse-integer (first split) :radix 16))
|
||||
(syllable (string-trim '(#\Space)
|
||||
(subseq (second split) 0 (position #\# (second split))))))
|
||||
(setf (gethash code table) syllable)))
|
||||
|
||||
(defvar *words*)
|
||||
|
||||
(defparameter *data*
|
||||
(with-open-file (in "~/src/sbcl/tools-for-build/UnicodeData.txt" :direction :input)
|
||||
(loop with words = (setf *words* (make-hash-table :size 1024 :test #'equal))
|
||||
for ucd-line = (read-line in nil nil nil)
|
||||
while ucd-line
|
||||
nconc (let* ((ucd-data (split-words ucd-line :set '(#\;)))
|
||||
(code (first ucd-data))
|
||||
(name (second ucd-data)))
|
||||
(unless (eql (char name 0) #\<)
|
||||
(setf name (substitute #\_ #\Space name))
|
||||
(list (list* (parse-integer code :radix 16)
|
||||
name
|
||||
(encode-words (split-words
|
||||
name
|
||||
:set '(#\Space #\_ #\-)
|
||||
:exclude nil)
|
||||
words))))))))
|
||||
|
||||
(print (length *data*))
|
||||
(print (first (last *data*)))
|
||||
|
||||
;#+(or)
|
||||
(progn
|
||||
(setf *data*
|
||||
(sort (nconc (fixup-hangul-syllables *words*) *data*)
|
||||
#'<
|
||||
:key #'car))
|
||||
(print (length *data*))
|
||||
(print (first (last *data*))))
|
||||
|
||||
(defparameter *words-array*
|
||||
(loop with array = (make-array (1+ (hash-table-count *words*)))
|
||||
for k being the hash-key in *words* using (hash-value v)
|
||||
do (setf (aref array v) k)
|
||||
finally (return array)))
|
||||
|
||||
(defparameter *last-word-index* (1- (length *words-array*)))
|
||||
|
||||
(defparameter *words-array-bytes*
|
||||
(loop for c across *words-array*
|
||||
sum (1+ (length c))))
|
||||
|
||||
(defun code-to-string (code)
|
||||
(aref *words-array* code))
|
||||
|
||||
(defparameter *flattened-data*
|
||||
(loop for (code name . rest) in *data*
|
||||
nconc (append rest (list 0))))
|
||||
|
||||
(defparameter *group-names*
|
||||
(loop with output = '()
|
||||
with start = (first (first *data*))
|
||||
with last = start
|
||||
for (code name . rest) in *data*
|
||||
do (when (>= (- code last) 2)
|
||||
(setf output (cons (list start last) output)
|
||||
start code))
|
||||
(setf last code)
|
||||
finally (return (nreverse (cons (list start code) output)))))
|
||||
298
contrib/unicode/names-pairs-sort.lisp
Normal file
298
contrib/unicode/names-pairs-sort.lisp
Normal file
|
|
@ -0,0 +1,298 @@
|
|||
(defparameter *destination*
|
||||
(merge-pathnames "../../src/c/unicode/"
|
||||
(or *load-truename* *compile-pathname*)))
|
||||
|
||||
(let* ((translated-data (copy-tree *compressed-data*))
|
||||
(pairs (copy-tree *paired-data*))
|
||||
(first-code (loop for (pair-code . pair) in pairs minimize pair-code))
|
||||
(last-code (loop for (pair-code . pair) in pairs maximize pair-code)))
|
||||
;;
|
||||
;; We make sure that for each character there is a unique pair which is not
|
||||
;; used anywhere else
|
||||
;;
|
||||
(loop with used-code = (make-array (1+ last-code) :initial-element nil)
|
||||
for line in translated-data
|
||||
for pair-code = (third line)
|
||||
do (cond ((/= (length line) 3)
|
||||
(error "Error in compressed data: too long code ~A" line))
|
||||
((or (aref used-code pair-code)
|
||||
(< pair-code first-code))
|
||||
(let ((new-pair (cons pair-code 0)))
|
||||
(setf pairs (acons (incf last-code) new-pair pairs)
|
||||
(third line) last-code)))
|
||||
(t
|
||||
(setf (aref used-code pair-code) t))))
|
||||
;;
|
||||
;; We now renumber all pairs.
|
||||
;;
|
||||
(let ((translation-table (make-array (1+ last-code) :initial-element nil))
|
||||
(counter -1))
|
||||
(flet ((add-code (code)
|
||||
(or (aref translation-table code)
|
||||
(setf (aref translation-table code) (incf counter))))
|
||||
(translate (old-code)
|
||||
(or (aref translation-table old-code)
|
||||
(error "Unknown code ~A" old-code))))
|
||||
;; First of all we add the words
|
||||
(loop for i from 0 below first-code
|
||||
do (add-code i))
|
||||
;; Then we add all pairs that represent characters, so that they
|
||||
;; are consecutive, too.
|
||||
(loop for line in translated-data
|
||||
do (setf (third line) (add-code (third line))))
|
||||
;; Finally, we add the remaining pairs
|
||||
(loop for record in pairs
|
||||
do (setf (car record) (add-code (car record))))
|
||||
;; ... and we fix the definitions
|
||||
(loop for (code . pair) in pairs
|
||||
do (setf (car pair) (translate (car pair))
|
||||
(cdr pair) (translate (cdr pair))))))
|
||||
(defparameter *sorted-compressed-data* translated-data)
|
||||
(defparameter *sorted-pairs* (sort pairs #'< :key #'car))
|
||||
(print 'finished)
|
||||
)
|
||||
|
||||
(defparameter *grouped-characters*
|
||||
(loop with last-ucd-code = nil
|
||||
with start-ucd-code = nil
|
||||
with start-code = nil
|
||||
with output = '()
|
||||
with aux = '()
|
||||
for n from (third (first *sorted-compressed-data*))
|
||||
for line in *sorted-compressed-data*
|
||||
for (ucd-code name code) = line
|
||||
do (cond ((/= code n)
|
||||
(error "Codes in *sorted-compressed-data* are not consecutive:~%~A"
|
||||
(cons line (subseq aux 0 10))))
|
||||
((null start-ucd-code)
|
||||
(setf start-ucd-code ucd-code
|
||||
start-code code))
|
||||
((= last-ucd-code (1- ucd-code))
|
||||
)
|
||||
(t
|
||||
(push (list start-ucd-code last-ucd-code start-code)
|
||||
output)
|
||||
(setf start-ucd-code ucd-code
|
||||
start-code code)))
|
||||
(setf last-ucd-code ucd-code aux (cons line aux))
|
||||
finally (return (nreverse output))))
|
||||
|
||||
(with-open-file (s (merge-pathnames "ucd_names.h" *destination*)
|
||||
:direction :output
|
||||
:if-exists :supersede)
|
||||
(format s "/*
|
||||
* UNICODE NAMES DATABASE
|
||||
*/
|
||||
#ifndef ECL_UCD_NAMES_H
|
||||
#define ECL_UCD_NAMES_H 1
|
||||
|
||||
#define ECL_UCD_FIRST_PAIR ~D
|
||||
#define ECL_UCD_TOTAL_PAIRS ~D
|
||||
#define ECL_UCD_TOTAL_GROUPS ~D
|
||||
#define ECL_UCD_LARGEST_CHAR_NAME ~D
|
||||
#define ECL_UCD_TOTAL_NAMES ~D
|
||||
|
||||
typedef struct {
|
||||
unsigned char codes[4];
|
||||
} ecl_ucd_names_pair_type;
|
||||
|
||||
typedef struct {
|
||||
int smallest, largest, pair_code;
|
||||
} ecl_ucd_names_char_group;
|
||||
|
||||
typedef struct {
|
||||
unsigned char pair[2];
|
||||
unsigned char code[3];
|
||||
} ecl_ucd_code_and_pair;
|
||||
|
||||
extern const ecl_ucd_names_pair_type ecl_ucd_names_pair[ECL_UCD_TOTAL_PAIRS];
|
||||
extern const ecl_ucd_names_char_group ecl_ucd_names_char[ECL_UCD_TOTAL_GROUPS];
|
||||
extern const char *ecl_ucd_names_word[ECL_UCD_FIRST_PAIR];
|
||||
extern const ecl_ucd_code_and_pair ecl_ucd_sorted_pairs[ECL_UCD_TOTAL_NAMES];
|
||||
|
||||
#endif
|
||||
"
|
||||
(1+ *last-word-index*)
|
||||
(length *sorted-pairs*)
|
||||
(length *grouped-characters*)
|
||||
(loop for (code name . rest) in *compressed-data*
|
||||
maximize (length name))
|
||||
(length *compressed-data*)
|
||||
))
|
||||
|
||||
(with-open-file (s (merge-pathnames "ucd_names_pair.c" *destination*)
|
||||
:direction :output
|
||||
:if-exists :supersede)
|
||||
(format s "/*
|
||||
* Pairs of symbols.
|
||||
*/
|
||||
|
||||
#include <ecl/ecl.h>
|
||||
#include \"ucd_names.h\"
|
||||
|
||||
const ecl_ucd_names_pair_type ecl_ucd_names_pair[ECL_UCD_TOTAL_PAIRS] = {
|
||||
"
|
||||
(length *sorted-pairs*) (length *sorted-pairs*))
|
||||
(loop for i from 0
|
||||
for (pair-code . (a . b)) in *sorted-pairs*
|
||||
do (format s "~A{~D, ~D, ~D, ~D}~%"
|
||||
(if (plusp i) "," "")
|
||||
(logand a #xff) (ash a -8)
|
||||
(logand b #xff) (ash b -8)
|
||||
))
|
||||
(format s "};~%"))
|
||||
|
||||
(with-open-file (s (merge-pathnames "ucd_names_codes.c" *destination*)
|
||||
:direction :output
|
||||
:if-exists :supersede)
|
||||
(format s "/*
|
||||
* Sorted character names.
|
||||
*/
|
||||
|
||||
#include <ecl/ecl.h>
|
||||
#include \"ucd_names.h\"
|
||||
|
||||
const ecl_ucd_code_and_pair ecl_ucd_sorted_pairs[ECL_UCD_TOTAL_NAMES] = {
|
||||
")
|
||||
(loop with l = (sort (copy-tree *sorted-compressed-data*) #'string<= :key #'second)
|
||||
for (ucd-code name code) in l
|
||||
for i from 0
|
||||
do (format s "~A{{~D, ~D}, {~D, ~D, ~D}}~%"
|
||||
(if (plusp i) "," "")
|
||||
(logand code #xff) (ash code -8)
|
||||
(logand ucd-code #xff) (logand (ash ucd-code -8) #xff)
|
||||
(logand (ash ucd-code -16) #xff)))
|
||||
(format s "};"))
|
||||
|
||||
(with-open-file (s (merge-pathnames "ucd_names_str.c" *destination*)
|
||||
:direction :output
|
||||
:if-exists :supersede)
|
||||
(format s "/*
|
||||
* Dictionary words.
|
||||
*/
|
||||
|
||||
#include <ecl/ecl.h>
|
||||
#include \"ucd_names.h\"
|
||||
|
||||
const char *ecl_ucd_names_word[ECL_UCD_FIRST_PAIR] = {
|
||||
")
|
||||
(loop for i from 0
|
||||
for c across *words-array*
|
||||
do (format s "~A~S~%" (if (plusp i) "," "") (or c "")))
|
||||
(format s "};~%"))
|
||||
|
||||
(with-open-file (s (merge-pathnames "ucd_names_char.c" *destination*)
|
||||
:direction :output
|
||||
:if-exists :supersede)
|
||||
(format s "/*
|
||||
* Dictionary words.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include <ecl/ecl.h>
|
||||
#include \"ucd_names.h\"
|
||||
|
||||
const ecl_ucd_names_char_group ecl_ucd_names_char[ECL_UCD_TOTAL_GROUPS] = {
|
||||
"
|
||||
(length *grouped-characters*))
|
||||
(loop for i from 0
|
||||
for (start end pair-code) in *grouped-characters*
|
||||
do (format s "~A{~D,~D,~D}~%" (if (plusp i) "," "")
|
||||
start end pair-code))
|
||||
(format s "};
|
||||
|
||||
static int
|
||||
search_pair(ecl_character c)
|
||||
{
|
||||
int mid, low = 0, up = ECL_UCD_TOTAL_GROUPS-1;
|
||||
do {
|
||||
mid = (up + low) / 2;
|
||||
if (c < ecl_ucd_names_char[mid].smallest)
|
||||
up = mid-1;
|
||||
else if (c > ecl_ucd_names_char[mid].largest)
|
||||
low = mid+1;
|
||||
else
|
||||
return (c - ecl_ucd_names_char[mid].smallest) +
|
||||
ecl_ucd_names_char[mid].pair_code;
|
||||
} while (low <= up && (low >= 0) && (up < ECL_UCD_TOTAL_GROUPS));
|
||||
return -1;
|
||||
}
|
||||
|
||||
static void
|
||||
fill_pair_name(char *buffer, int pair)
|
||||
{
|
||||
if (pair < ECL_UCD_FIRST_PAIR) {
|
||||
strncat(buffer, ecl_ucd_names_word[pair], ECL_UCD_LARGEST_CHAR_NAME+1);
|
||||
/*
|
||||
printf(\"text=%s\\n\", ecl_ucd_names_word[pair]);
|
||||
*/
|
||||
} else {
|
||||
const ecl_ucd_names_pair_type p = ecl_ucd_names_pair[pair - ECL_UCD_FIRST_PAIR];
|
||||
/*
|
||||
printf(\"ndx=%d\\n\", pair - ECL_UCD_FIRST_PAIR);
|
||||
printf(\"c0=%d\\n\", ecl_ucd_names_pair[pair - ECL_UCD_FIRST_PAIR].codes[0]);
|
||||
printf(\"c1=%d\\n\", ecl_ucd_names_pair[pair - ECL_UCD_FIRST_PAIR].codes[1]);
|
||||
printf(\"c2=%d\\n\", ecl_ucd_names_pair[pair - ECL_UCD_FIRST_PAIR].codes[2]);
|
||||
printf(\"c3=%d\\n\", ecl_ucd_names_pair[pair - ECL_UCD_FIRST_PAIR].codes[3]);
|
||||
*/
|
||||
fill_pair_name(buffer, (((unsigned int)p.codes[1]) << 8) | p.codes[0]);
|
||||
fill_pair_name(buffer, (((unsigned int)p.codes[3]) << 8) | p.codes[2]);
|
||||
}
|
||||
}
|
||||
|
||||
cl_object
|
||||
_ecl_ucd_code_to_name(ecl_character c)
|
||||
{
|
||||
int pair = search_pair(c);
|
||||
if (pair < 0)
|
||||
return ECL_NIL;
|
||||
else {
|
||||
char buffer[ECL_UCD_LARGEST_CHAR_NAME+1];
|
||||
buffer[0] = 0;
|
||||
fill_pair_name(buffer, pair);
|
||||
return make_base_string_copy(buffer);
|
||||
}
|
||||
}
|
||||
|
||||
cl_object
|
||||
_ecl_ucd_name_to_code(cl_object name)
|
||||
{
|
||||
int mid, low = 0, up = ECL_UCD_TOTAL_NAMES-1;
|
||||
int l = ecl_length(name);
|
||||
if (l <= ECL_UCD_LARGEST_CHAR_NAME) {
|
||||
char buffer1[ECL_UCD_LARGEST_CHAR_NAME+1];
|
||||
char buffer2[ECL_UCD_LARGEST_CHAR_NAME+1];
|
||||
for (mid = 0; mid < l; mid++) {
|
||||
ecl_character c = ecl_char_upcase(ecl_char(name, mid));
|
||||
buffer1[mid] = c;
|
||||
if (c < 32 || c > 127) /* All character names are [-A-Z_0-9]* */
|
||||
return ECL_NIL;
|
||||
}
|
||||
buffer1[mid] = 0;
|
||||
do {
|
||||
ecl_ucd_code_and_pair p = ecl_ucd_sorted_pairs[mid = (low + up) / 2];
|
||||
int flag, pair = ((unsigned int)p.pair[1] << 8) | p.pair[0];
|
||||
buffer2[0] = 0;
|
||||
fill_pair_name(buffer2, pair);
|
||||
flag = strcmp(buffer1, buffer2);
|
||||
/*
|
||||
printf(\"[%d,%d,%d] %s <> (%d)%s -> %d\\n\",
|
||||
low, mid, up, buffer1, pair, buffer2, flag);
|
||||
*/
|
||||
if (flag == 0) {
|
||||
return ecl_make_fixnum(((unsigned int)p.code[2] << 16) |
|
||||
((unsigned int)p.code[1] << 8) |
|
||||
p.code[0]);
|
||||
} else if (flag < 0) {
|
||||
up = mid - 1;
|
||||
} else {
|
||||
low = mid + 1;
|
||||
}
|
||||
} while (low <= up);
|
||||
}
|
||||
return ECL_NIL;
|
||||
}
|
||||
|
||||
"))
|
||||
|
||||
;(ext:run-program "/bin/sh" '("-c" "cp *.c *.h ~/devel/ecl/src/c/unicode/"))
|
||||
138
contrib/unicode/names-pairs.lisp
Normal file
138
contrib/unicode/names-pairs.lisp
Normal file
|
|
@ -0,0 +1,138 @@
|
|||
(load "./load-names.lisp")
|
||||
|
||||
(declaim (optimize (debug 0) (speed 3)))
|
||||
|
||||
(setf *print-circle* t)
|
||||
|
||||
(defun compute-pairs (data table)
|
||||
(clrhash table)
|
||||
(loop with max = 0
|
||||
with max-pair = nil
|
||||
for (code name . l) in data
|
||||
do (loop for l2 on l
|
||||
for a = (car l2)
|
||||
for b = (cadr l2)
|
||||
while b
|
||||
do (let* ((pair (cons a b))
|
||||
(c (gethash pair table)))
|
||||
(setf (gethash pair table)
|
||||
(setf c (if c (1+ c) 1))
|
||||
a b)
|
||||
(when (> c max)
|
||||
(setf max c max-pair pair))))
|
||||
finally (return (cons max max-pair))))
|
||||
|
||||
(defun replace-pair (pair code data)
|
||||
(let ((old-a (car pair))
|
||||
(old-b (cdr pair)))
|
||||
(loop with more = 0
|
||||
for (ucd-code name . l) in data
|
||||
do (loop with l2 = l
|
||||
for a = (first l2)
|
||||
for b = (second l2)
|
||||
while b
|
||||
do (when (and (eql a old-a) (eql b old-b))
|
||||
;; replace (a b . c) with (pair . c)
|
||||
(setf (car l2) code
|
||||
(cdr l2) (cddr l2)))
|
||||
do (setf l2 (cdr l2)))
|
||||
do (setf more (+ more (1- (length l))))
|
||||
finally (return more))))
|
||||
|
||||
(defun compress (data)
|
||||
(loop with last-length = 0
|
||||
with table = (make-hash-table :size 2048 :test #'equal)
|
||||
with pairs = '()
|
||||
for new-symbol from (1+ *last-word-index*)
|
||||
for (frequency . pair) = (compute-pairs data table)
|
||||
while (and pair (> frequency 1))
|
||||
do
|
||||
(format t "~%;;; ~A, ~D -> ~D, ~D left" pair frequency new-symbol
|
||||
(replace-pair pair new-symbol data))
|
||||
(setf pairs (acons new-symbol pair pairs))
|
||||
finally
|
||||
;; There are no redundant pairs. We just define ad-hoc new
|
||||
;; symbols for all remaining strings.
|
||||
(loop with n = new-symbol
|
||||
for (code name . l) in data
|
||||
do (loop with l2 = l
|
||||
for a = (first l2)
|
||||
for b = (second l2)
|
||||
while b
|
||||
do (setf pairs (acons n (cons a b) pairs)
|
||||
(car l2) n
|
||||
(cdr l2) (cddr l2)
|
||||
n (1+ n))))
|
||||
(print 'finished)
|
||||
(return-from compress (nreverse pairs))))
|
||||
|
||||
(progn
|
||||
(defparameter *compressed-data* (copy-tree *data*))
|
||||
(defparameter *paired-data* (compress *compressed-data*)))
|
||||
|
||||
(defparameter *last-code* (first (first (last *paired-data*))))
|
||||
|
||||
(defparameter *code-ndx-size* (ceiling (integer-length *last-code*) 8))
|
||||
|
||||
(defparameter *pair-table-size* (* (length *paired-data*)
|
||||
(* 2 *code-ndx-size*)))
|
||||
|
||||
(defparameter *code-to-name-bytes*
|
||||
(* (length *compressed-data*)
|
||||
(+ 3 ; Size of Unicode code
|
||||
;; Size of index into the data table
|
||||
*code-ndx-size*)))
|
||||
|
||||
(defparameter *sorted-names-bytes*
|
||||
;; The sorted list of character names is just a list of indices into
|
||||
;; the *code-to-name-bytes* table
|
||||
(* (length *compressed-data*) *code-ndx-size*))
|
||||
|
||||
(defparameter *word-dictionary*
|
||||
(+ *words-array-bytes*))
|
||||
|
||||
(format t "
|
||||
;;; Codes dictionary = ~D bytes
|
||||
;;; Pair table size = ~D bytes
|
||||
;;; Code to names table = ~D bytes
|
||||
;;; Names to codes table = ~D bytes
|
||||
;;; Total = ~D bytes
|
||||
"
|
||||
*word-dictionary*
|
||||
*pair-table-size*
|
||||
*code-to-name-bytes*
|
||||
*sorted-names-bytes*
|
||||
(+
|
||||
*word-dictionary*
|
||||
*pair-table-size*
|
||||
*code-to-name-bytes*
|
||||
*sorted-names-bytes*
|
||||
))
|
||||
|
||||
;;; WITH HANGUL
|
||||
;;; Codes dictionary = 78566 bytes
|
||||
;;; Pair table size = 198752 bytes
|
||||
;;; Code to names table = 164570 bytes
|
||||
;;; Names to codes table = 65828 bytes
|
||||
;;; Total = 507716 bytes
|
||||
|
||||
;;; WITHOUT HANGUL
|
||||
;;; Codes dictionary = 78555 bytes
|
||||
;;; Pair table size = 150868 bytes
|
||||
;;; Code to names table = 108710 bytes
|
||||
;;; Names to codes table = 43484 bytes
|
||||
;;; Total = 381617 bytes
|
||||
|
||||
;;; Without HANGUL (split by space and -)
|
||||
;;; Codes dictionary = 58258 bytes
|
||||
;;; Pair table size = 160576 bytes
|
||||
;;; Code to names table = 108710 bytes
|
||||
;;; Names to codes table = 43484 bytes
|
||||
;;; Total = 371028 bytes
|
||||
|
||||
;;; With HANGUL (split by space and -)
|
||||
;;; Codes dictionary = 58269 bytes
|
||||
;;; Pair table size = 208460 bytes
|
||||
;;; Code to names table = 164570 bytes
|
||||
;;; Names to codes table = 65828 bytes
|
||||
;;; Total = 497127 bytes
|
||||
|
|
@ -465,7 +465,15 @@ cl_char_name(cl_object c)
|
|||
{
|
||||
ecl_character code = ecl_char_code(c);
|
||||
cl_object output;
|
||||
if (code > 127) {
|
||||
if (code <= 127) {
|
||||
output = ecl_gethash_safe(ecl_make_fixnum(code), cl_core.char_names, ECL_NIL);
|
||||
}
|
||||
#ifdef ECL_UNICODE_NAMES
|
||||
else if (!Null(output = _ecl_ucd_code_to_name(code))) {
|
||||
(void)0;
|
||||
}
|
||||
#endif
|
||||
else {
|
||||
ecl_base_char name[8];
|
||||
ecl_base_char *start;
|
||||
name[7] = 0;
|
||||
|
|
@ -482,8 +490,6 @@ cl_char_name(cl_object c)
|
|||
}
|
||||
start[0] = 'U';
|
||||
output = make_base_string_copy((const char*)start);
|
||||
} else {
|
||||
output = ecl_gethash_safe(ecl_make_fixnum(code), cl_core.char_names, ECL_NIL);
|
||||
}
|
||||
@(return output);
|
||||
}
|
||||
|
|
@ -491,13 +497,21 @@ cl_char_name(cl_object c)
|
|||
cl_object
|
||||
cl_name_char(cl_object name)
|
||||
{
|
||||
const cl_env_ptr the_env = ecl_process_env();
|
||||
cl_object c;
|
||||
cl_index l;
|
||||
name = cl_string(name);
|
||||
c = ecl_gethash_safe(name, cl_core.char_names, ECL_NIL);
|
||||
if (c != ECL_NIL) {
|
||||
c = ECL_CODE_CHAR(ecl_fixnum(c));
|
||||
} else if (ecl_stringp(name) && (l = ecl_length(name))) {
|
||||
ecl_return1(the_env, ECL_CODE_CHAR(ecl_fixnum(c)));
|
||||
}
|
||||
#ifdef ECL_UNICODE_NAMES
|
||||
c = _ecl_ucd_name_to_code(name);
|
||||
if (c != ECL_NIL) {
|
||||
ecl_return1(the_env, cl_code_char(c));
|
||||
}
|
||||
#endif
|
||||
if (ecl_stringp(name) && (l = ecl_length(name))) {
|
||||
c = cl_char(name, ecl_make_fixnum(0));
|
||||
if (l == 1) {
|
||||
(void)0;
|
||||
|
|
@ -516,5 +530,5 @@ cl_name_char(cl_object name)
|
|||
}
|
||||
}
|
||||
}
|
||||
@(return c);
|
||||
ecl_return1(the_env, c);
|
||||
}
|
||||
|
|
|
|||
31
src/c/unicode/ucd_names.h
Normal file
31
src/c/unicode/ucd_names.h
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
/*
|
||||
* UNICODE NAMES DATABASE
|
||||
*/
|
||||
#ifndef ECL_UCD_NAMES_H
|
||||
#define ECL_UCD_NAMES_H 1
|
||||
|
||||
#define ECL_UCD_FIRST_PAIR 9699
|
||||
#define ECL_UCD_TOTAL_PAIRS 37993
|
||||
#define ECL_UCD_TOTAL_GROUPS 481
|
||||
#define ECL_UCD_LARGEST_CHAR_NAME 83
|
||||
#define ECL_UCD_TOTAL_NAMES 32914
|
||||
|
||||
typedef struct {
|
||||
unsigned char codes[4];
|
||||
} ecl_ucd_names_pair_type;
|
||||
|
||||
typedef struct {
|
||||
int smallest, largest, pair_code;
|
||||
} ecl_ucd_names_char_group;
|
||||
|
||||
typedef struct {
|
||||
unsigned char pair[2];
|
||||
unsigned char code[3];
|
||||
} ecl_ucd_code_and_pair;
|
||||
|
||||
extern const ecl_ucd_names_pair_type ecl_ucd_names_pair[ECL_UCD_TOTAL_PAIRS];
|
||||
extern const ecl_ucd_names_char_group ecl_ucd_names_char[ECL_UCD_TOTAL_GROUPS];
|
||||
extern const char *ecl_ucd_names_word[ECL_UCD_FIRST_PAIR];
|
||||
extern const ecl_ucd_code_and_pair ecl_ucd_sorted_pairs[ECL_UCD_TOTAL_NAMES];
|
||||
|
||||
#endif
|
||||
584
src/c/unicode/ucd_names_char.c
Normal file
584
src/c/unicode/ucd_names_char.c
Normal file
|
|
@ -0,0 +1,584 @@
|
|||
/*
|
||||
* Dictionary words.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include <ecl/ecl.h>
|
||||
#include "ucd_names.h"
|
||||
|
||||
const ecl_ucd_names_char_group ecl_ucd_names_char[ECL_UCD_TOTAL_GROUPS] = {
|
||||
{32,126,9699}
|
||||
,{160,887,9794}
|
||||
,{890,894,10522}
|
||||
,{900,906,10527}
|
||||
,{908,908,10534}
|
||||
,{910,929,10535}
|
||||
,{931,1317,10555}
|
||||
,{1329,1366,10942}
|
||||
,{1369,1375,10980}
|
||||
,{1377,1415,10987}
|
||||
,{1417,1418,11026}
|
||||
,{1425,1479,11028}
|
||||
,{1488,1514,11083}
|
||||
,{1520,1524,11110}
|
||||
,{1536,1539,11115}
|
||||
,{1542,1563,11119}
|
||||
,{1566,1567,11141}
|
||||
,{1569,1630,11143}
|
||||
,{1632,1805,11205}
|
||||
,{1807,1866,11379}
|
||||
,{1869,1969,11439}
|
||||
,{1984,2042,11540}
|
||||
,{2048,2093,11599}
|
||||
,{2096,2110,11645}
|
||||
,{2304,2361,11660}
|
||||
,{2364,2382,11718}
|
||||
,{2384,2389,11737}
|
||||
,{2392,2418,11743}
|
||||
,{2425,2431,11770}
|
||||
,{2433,2435,11777}
|
||||
,{2437,2444,11780}
|
||||
,{2447,2448,11788}
|
||||
,{2451,2472,11790}
|
||||
,{2474,2480,11812}
|
||||
,{2482,2482,11819}
|
||||
,{2486,2489,11820}
|
||||
,{2492,2500,11824}
|
||||
,{2503,2504,11833}
|
||||
,{2507,2510,11835}
|
||||
,{2519,2519,11839}
|
||||
,{2524,2525,11840}
|
||||
,{2527,2531,11842}
|
||||
,{2534,2555,11847}
|
||||
,{2561,2563,11869}
|
||||
,{2565,2570,11872}
|
||||
,{2575,2576,11878}
|
||||
,{2579,2600,11880}
|
||||
,{2602,2608,11902}
|
||||
,{2610,2611,11909}
|
||||
,{2613,2614,11911}
|
||||
,{2616,2617,11913}
|
||||
,{2620,2620,11915}
|
||||
,{2622,2626,11916}
|
||||
,{2631,2632,11921}
|
||||
,{2635,2637,11923}
|
||||
,{2641,2641,11926}
|
||||
,{2649,2652,11927}
|
||||
,{2654,2654,11931}
|
||||
,{2662,2677,11932}
|
||||
,{2689,2691,11948}
|
||||
,{2693,2701,11951}
|
||||
,{2703,2705,11960}
|
||||
,{2707,2728,11963}
|
||||
,{2730,2736,11985}
|
||||
,{2738,2739,11992}
|
||||
,{2741,2745,11994}
|
||||
,{2748,2757,11999}
|
||||
,{2759,2761,12009}
|
||||
,{2763,2765,12012}
|
||||
,{2768,2768,12015}
|
||||
,{2784,2787,12016}
|
||||
,{2790,2799,12020}
|
||||
,{2801,2801,12030}
|
||||
,{2817,2819,12031}
|
||||
,{2821,2828,12034}
|
||||
,{2831,2832,12042}
|
||||
,{2835,2856,12044}
|
||||
,{2858,2864,12066}
|
||||
,{2866,2867,12073}
|
||||
,{2869,2873,12075}
|
||||
,{2876,2884,12080}
|
||||
,{2887,2888,12089}
|
||||
,{2891,2893,12091}
|
||||
,{2902,2903,12094}
|
||||
,{2908,2909,12096}
|
||||
,{2911,2915,12098}
|
||||
,{2918,2929,12103}
|
||||
,{2946,2947,12115}
|
||||
,{2949,2954,12117}
|
||||
,{2958,2960,12123}
|
||||
,{2962,2965,12126}
|
||||
,{2969,2970,12130}
|
||||
,{2972,2972,12132}
|
||||
,{2974,2975,12133}
|
||||
,{2979,2980,12135}
|
||||
,{2984,2986,12137}
|
||||
,{2990,3001,12140}
|
||||
,{3006,3010,12152}
|
||||
,{3014,3016,12157}
|
||||
,{3018,3021,12160}
|
||||
,{3024,3024,12164}
|
||||
,{3031,3031,12165}
|
||||
,{3046,3066,12166}
|
||||
,{3073,3075,12187}
|
||||
,{3077,3084,12190}
|
||||
,{3086,3088,12198}
|
||||
,{3090,3112,12201}
|
||||
,{3114,3123,12224}
|
||||
,{3125,3129,12234}
|
||||
,{3133,3140,12239}
|
||||
,{3142,3144,12247}
|
||||
,{3146,3149,12250}
|
||||
,{3157,3158,12254}
|
||||
,{3160,3161,12256}
|
||||
,{3168,3171,12258}
|
||||
,{3174,3183,12262}
|
||||
,{3192,3199,12272}
|
||||
,{3202,3203,12280}
|
||||
,{3205,3212,12282}
|
||||
,{3214,3216,12290}
|
||||
,{3218,3240,12293}
|
||||
,{3242,3251,12316}
|
||||
,{3253,3257,12326}
|
||||
,{3260,3268,12331}
|
||||
,{3270,3272,12340}
|
||||
,{3274,3277,12343}
|
||||
,{3285,3286,12347}
|
||||
,{3294,3294,12349}
|
||||
,{3296,3299,12350}
|
||||
,{3302,3311,12354}
|
||||
,{3313,3314,12364}
|
||||
,{3330,3331,12366}
|
||||
,{3333,3340,12368}
|
||||
,{3342,3344,12376}
|
||||
,{3346,3368,12379}
|
||||
,{3370,3385,12402}
|
||||
,{3389,3396,12418}
|
||||
,{3398,3400,12426}
|
||||
,{3402,3405,12429}
|
||||
,{3415,3415,12433}
|
||||
,{3424,3427,12434}
|
||||
,{3430,3445,12438}
|
||||
,{3449,3455,12454}
|
||||
,{3458,3459,12461}
|
||||
,{3461,3478,12463}
|
||||
,{3482,3505,12481}
|
||||
,{3507,3515,12505}
|
||||
,{3517,3517,12514}
|
||||
,{3520,3526,12515}
|
||||
,{3530,3530,12522}
|
||||
,{3535,3540,12523}
|
||||
,{3542,3542,12529}
|
||||
,{3544,3551,12530}
|
||||
,{3570,3572,12538}
|
||||
,{3585,3642,12541}
|
||||
,{3647,3675,12599}
|
||||
,{3713,3714,12628}
|
||||
,{3716,3716,12630}
|
||||
,{3719,3720,12631}
|
||||
,{3722,3722,12633}
|
||||
,{3725,3725,12634}
|
||||
,{3732,3735,12635}
|
||||
,{3737,3743,12639}
|
||||
,{3745,3747,12646}
|
||||
,{3749,3749,12649}
|
||||
,{3751,3751,12650}
|
||||
,{3754,3755,12651}
|
||||
,{3757,3769,12653}
|
||||
,{3771,3773,12666}
|
||||
,{3776,3780,12669}
|
||||
,{3782,3782,12674}
|
||||
,{3784,3789,12675}
|
||||
,{3792,3801,12681}
|
||||
,{3804,3805,12691}
|
||||
,{3840,3911,12693}
|
||||
,{3913,3948,12765}
|
||||
,{3953,3979,12801}
|
||||
,{3984,3991,12828}
|
||||
,{3993,4028,12836}
|
||||
,{4030,4044,12872}
|
||||
,{4046,4056,12887}
|
||||
,{4096,4293,12898}
|
||||
,{4304,4348,13096}
|
||||
,{4352,4680,13141}
|
||||
,{4682,4685,13470}
|
||||
,{4688,4694,13474}
|
||||
,{4696,4696,13481}
|
||||
,{4698,4701,13482}
|
||||
,{4704,4744,13486}
|
||||
,{4746,4749,13527}
|
||||
,{4752,4784,13531}
|
||||
,{4786,4789,13564}
|
||||
,{4792,4798,13568}
|
||||
,{4800,4800,13575}
|
||||
,{4802,4805,13576}
|
||||
,{4808,4822,13580}
|
||||
,{4824,4880,13595}
|
||||
,{4882,4885,13652}
|
||||
,{4888,4954,13656}
|
||||
,{4959,4988,13723}
|
||||
,{4992,5017,13753}
|
||||
,{5024,5108,13779}
|
||||
,{5120,5788,13864}
|
||||
,{5792,5872,14533}
|
||||
,{5888,5900,14614}
|
||||
,{5902,5908,14627}
|
||||
,{5920,5942,14634}
|
||||
,{5952,5971,14657}
|
||||
,{5984,5996,14677}
|
||||
,{5998,6000,14690}
|
||||
,{6002,6003,14693}
|
||||
,{6016,6109,14695}
|
||||
,{6112,6121,14789}
|
||||
,{6128,6137,14799}
|
||||
,{6144,6158,14809}
|
||||
,{6160,6169,14824}
|
||||
,{6176,6263,14834}
|
||||
,{6272,6314,14922}
|
||||
,{6320,6389,14965}
|
||||
,{6400,6428,15035}
|
||||
,{6432,6443,15064}
|
||||
,{6448,6459,15076}
|
||||
,{6464,6464,15088}
|
||||
,{6468,6509,15089}
|
||||
,{6512,6516,15131}
|
||||
,{6528,6571,15136}
|
||||
,{6576,6601,15180}
|
||||
,{6608,6618,15206}
|
||||
,{6622,6683,15217}
|
||||
,{6686,6750,15279}
|
||||
,{6752,6780,15344}
|
||||
,{6783,6793,15373}
|
||||
,{6800,6809,15384}
|
||||
,{6816,6829,15394}
|
||||
,{6912,6987,15408}
|
||||
,{6992,7036,15484}
|
||||
,{7040,7082,15529}
|
||||
,{7086,7097,15572}
|
||||
,{7168,7223,15584}
|
||||
,{7227,7241,15640}
|
||||
,{7245,7295,15655}
|
||||
,{7376,7410,15706}
|
||||
,{7424,7654,15741}
|
||||
,{7677,7957,15972}
|
||||
,{7960,7965,16253}
|
||||
,{7968,8005,16259}
|
||||
,{8008,8013,16297}
|
||||
,{8016,8023,16303}
|
||||
,{8025,8025,16311}
|
||||
,{8027,8027,16312}
|
||||
,{8029,8029,16313}
|
||||
,{8031,8061,16314}
|
||||
,{8064,8116,16345}
|
||||
,{8118,8132,16398}
|
||||
,{8134,8147,16413}
|
||||
,{8150,8155,16427}
|
||||
,{8157,8175,16433}
|
||||
,{8178,8180,16452}
|
||||
,{8182,8190,16455}
|
||||
,{8192,8292,16464}
|
||||
,{8298,8305,16565}
|
||||
,{8308,8334,16573}
|
||||
,{8336,8340,16600}
|
||||
,{8352,8376,16605}
|
||||
,{8400,8432,16630}
|
||||
,{8448,8585,16663}
|
||||
,{8592,9192,16801}
|
||||
,{9216,9254,17402}
|
||||
,{9280,9290,17441}
|
||||
,{9312,9933,17452}
|
||||
,{9935,9953,18074}
|
||||
,{9955,9955,18093}
|
||||
,{9960,9983,18094}
|
||||
,{9985,9988,18118}
|
||||
,{9990,9993,18122}
|
||||
,{9996,10023,18126}
|
||||
,{10025,10059,18154}
|
||||
,{10061,10061,18189}
|
||||
,{10063,10066,18190}
|
||||
,{10070,10078,18194}
|
||||
,{10081,10132,18203}
|
||||
,{10136,10159,18255}
|
||||
,{10161,10174,18279}
|
||||
,{10176,10186,18293}
|
||||
,{10188,10188,18304}
|
||||
,{10192,11084,18305}
|
||||
,{11088,11097,19198}
|
||||
,{11264,11310,19208}
|
||||
,{11312,11358,19255}
|
||||
,{11360,11505,19302}
|
||||
,{11513,11557,19448}
|
||||
,{11568,11621,19493}
|
||||
,{11631,11631,19547}
|
||||
,{11648,11670,19548}
|
||||
,{11680,11686,19571}
|
||||
,{11688,11694,19578}
|
||||
,{11696,11702,19585}
|
||||
,{11704,11710,19592}
|
||||
,{11712,11718,19599}
|
||||
,{11720,11726,19606}
|
||||
,{11728,11734,19613}
|
||||
,{11736,11742,19620}
|
||||
,{11744,11825,19627}
|
||||
,{11904,11929,19709}
|
||||
,{11931,12019,19735}
|
||||
,{12032,12245,19824}
|
||||
,{12272,12283,20038}
|
||||
,{12288,12351,20050}
|
||||
,{12353,12438,20114}
|
||||
,{12441,12543,20200}
|
||||
,{12549,12589,20303}
|
||||
,{12593,12686,20344}
|
||||
,{12688,12727,20438}
|
||||
,{12736,12771,20478}
|
||||
,{12784,12830,20514}
|
||||
,{12832,13054,20561}
|
||||
,{13056,13311,20784}
|
||||
,{19904,19967,21040}
|
||||
,{40960,42124,21104}
|
||||
,{42128,42182,22269}
|
||||
,{42192,42539,22324}
|
||||
,{42560,42591,22672}
|
||||
,{42594,42611,22704}
|
||||
,{42620,42647,22722}
|
||||
,{42656,42743,22750}
|
||||
,{42752,42892,22838}
|
||||
,{43003,43051,22979}
|
||||
,{43056,43065,23028}
|
||||
,{43072,43127,23038}
|
||||
,{43136,43204,23094}
|
||||
,{43214,43225,23163}
|
||||
,{43232,43259,23175}
|
||||
,{43264,43347,23203}
|
||||
,{43359,43388,23287}
|
||||
,{43392,43469,23317}
|
||||
,{43471,43481,23395}
|
||||
,{43486,43487,23406}
|
||||
,{43520,43574,23408}
|
||||
,{43584,43597,23463}
|
||||
,{43600,43609,23477}
|
||||
,{43612,43643,23487}
|
||||
,{43648,43714,23519}
|
||||
,{43739,43743,23586}
|
||||
,{43968,44013,23591}
|
||||
,{44016,44025,23637}
|
||||
,{44032,55203,23647}
|
||||
,{55216,55238,34819}
|
||||
,{55243,55291,34842}
|
||||
,{63744,64045,34891}
|
||||
,{64048,64109,35193}
|
||||
,{64112,64217,35255}
|
||||
,{64256,64262,35361}
|
||||
,{64275,64279,35368}
|
||||
,{64285,64310,35373}
|
||||
,{64312,64316,35399}
|
||||
,{64318,64318,35404}
|
||||
,{64320,64321,35405}
|
||||
,{64323,64324,35407}
|
||||
,{64326,64433,35409}
|
||||
,{64467,64831,35517}
|
||||
,{64848,64911,35882}
|
||||
,{64914,64967,35946}
|
||||
,{65008,65021,36000}
|
||||
,{65024,65049,36014}
|
||||
,{65056,65062,36040}
|
||||
,{65072,65106,36047}
|
||||
,{65108,65126,36082}
|
||||
,{65128,65131,36101}
|
||||
,{65136,65140,36105}
|
||||
,{65142,65276,36110}
|
||||
,{65279,65279,36245}
|
||||
,{65281,65470,36246}
|
||||
,{65474,65479,36436}
|
||||
,{65482,65487,36442}
|
||||
,{65490,65495,36448}
|
||||
,{65498,65500,36454}
|
||||
,{65504,65510,36457}
|
||||
,{65512,65518,36464}
|
||||
,{65529,65533,36471}
|
||||
,{65536,65547,36476}
|
||||
,{65549,65574,36488}
|
||||
,{65576,65594,36514}
|
||||
,{65596,65597,36533}
|
||||
,{65599,65613,36535}
|
||||
,{65616,65629,36550}
|
||||
,{65664,65786,36564}
|
||||
,{65792,65794,36687}
|
||||
,{65799,65843,36690}
|
||||
,{65847,65930,36735}
|
||||
,{65936,65947,36819}
|
||||
,{66000,66045,36831}
|
||||
,{66176,66204,36877}
|
||||
,{66208,66256,36906}
|
||||
,{66304,66334,36955}
|
||||
,{66336,66339,36986}
|
||||
,{66352,66378,36990}
|
||||
,{66432,66461,37017}
|
||||
,{66463,66499,37047}
|
||||
,{66504,66517,37084}
|
||||
,{66560,66717,37098}
|
||||
,{66720,66729,37256}
|
||||
,{67584,67589,37266}
|
||||
,{67592,67592,37272}
|
||||
,{67594,67637,37273}
|
||||
,{67639,67640,37317}
|
||||
,{67644,67644,37319}
|
||||
,{67647,67669,37320}
|
||||
,{67671,67679,37343}
|
||||
,{67840,67867,37352}
|
||||
,{67871,67897,37380}
|
||||
,{67903,67903,37407}
|
||||
,{68096,68099,37408}
|
||||
,{68101,68102,37412}
|
||||
,{68108,68115,37414}
|
||||
,{68117,68119,37422}
|
||||
,{68121,68147,37425}
|
||||
,{68152,68154,37452}
|
||||
,{68159,68167,37455}
|
||||
,{68176,68184,37464}
|
||||
,{68192,68223,37473}
|
||||
,{68352,68405,37505}
|
||||
,{68409,68437,37559}
|
||||
,{68440,68466,37588}
|
||||
,{68472,68479,37615}
|
||||
,{68608,68680,37623}
|
||||
,{69216,69246,37696}
|
||||
,{69760,69825,37727}
|
||||
,{73728,74606,37793}
|
||||
,{74752,74850,38672}
|
||||
,{74864,74867,38771}
|
||||
,{77824,78894,38775}
|
||||
,{118784,119029,39846}
|
||||
,{119040,119078,40092}
|
||||
,{119081,119261,40131}
|
||||
,{119296,119365,40312}
|
||||
,{119552,119638,40382}
|
||||
,{119648,119665,40469}
|
||||
,{119808,119892,40487}
|
||||
,{119894,119964,40572}
|
||||
,{119966,119967,40643}
|
||||
,{119970,119970,40645}
|
||||
,{119973,119974,40646}
|
||||
,{119977,119980,40648}
|
||||
,{119982,119993,40652}
|
||||
,{119995,119995,40664}
|
||||
,{119997,120003,40665}
|
||||
,{120005,120069,40672}
|
||||
,{120071,120074,40737}
|
||||
,{120077,120084,40741}
|
||||
,{120086,120092,40749}
|
||||
,{120094,120121,40756}
|
||||
,{120123,120126,40784}
|
||||
,{120128,120132,40788}
|
||||
,{120134,120134,40793}
|
||||
,{120138,120144,40794}
|
||||
,{120146,120485,40801}
|
||||
,{120488,120779,41141}
|
||||
,{120782,120831,41433}
|
||||
,{126976,127019,41483}
|
||||
,{127024,127123,41527}
|
||||
,{127232,127242,41627}
|
||||
,{127248,127278,41638}
|
||||
,{127281,127281,41669}
|
||||
,{127293,127293,41670}
|
||||
,{127295,127295,41671}
|
||||
,{127298,127298,41672}
|
||||
,{127302,127302,41673}
|
||||
,{127306,127310,41674}
|
||||
,{127319,127319,41679}
|
||||
,{127327,127327,41680}
|
||||
,{127353,127353,41681}
|
||||
,{127355,127356,41682}
|
||||
,{127359,127359,41684}
|
||||
,{127370,127373,41685}
|
||||
,{127376,127376,41689}
|
||||
,{127488,127488,41690}
|
||||
,{127504,127537,41691}
|
||||
,{127552,127560,41725}
|
||||
,{194560,195101,41734}
|
||||
,{917505,917505,42276}
|
||||
,{917536,917631,42277}
|
||||
};
|
||||
|
||||
static int
|
||||
search_pair(ecl_character c)
|
||||
{
|
||||
int mid, low = 0, up = ECL_UCD_TOTAL_GROUPS-1;
|
||||
do {
|
||||
mid = (up + low) / 2;
|
||||
if (c < ecl_ucd_names_char[mid].smallest)
|
||||
up = mid-1;
|
||||
else if (c > ecl_ucd_names_char[mid].largest)
|
||||
low = mid+1;
|
||||
else
|
||||
return (c - ecl_ucd_names_char[mid].smallest) +
|
||||
ecl_ucd_names_char[mid].pair_code;
|
||||
} while (low <= up && (low >= 0) && (up < ECL_UCD_TOTAL_GROUPS));
|
||||
return -1;
|
||||
}
|
||||
|
||||
static void
|
||||
fill_pair_name(char *buffer, int pair)
|
||||
{
|
||||
if (pair < ECL_UCD_FIRST_PAIR) {
|
||||
strncat(buffer, ecl_ucd_names_word[pair], ECL_UCD_LARGEST_CHAR_NAME+1);
|
||||
/*
|
||||
printf("text=%s\n", ecl_ucd_names_word[pair]);
|
||||
*/
|
||||
} else {
|
||||
const ecl_ucd_names_pair_type p = ecl_ucd_names_pair[pair - ECL_UCD_FIRST_PAIR];
|
||||
/*
|
||||
printf("ndx=%d\n", pair - ECL_UCD_FIRST_PAIR);
|
||||
printf("c0=%d\n", ecl_ucd_names_pair[pair - ECL_UCD_FIRST_PAIR].codes[0]);
|
||||
printf("c1=%d\n", ecl_ucd_names_pair[pair - ECL_UCD_FIRST_PAIR].codes[1]);
|
||||
printf("c2=%d\n", ecl_ucd_names_pair[pair - ECL_UCD_FIRST_PAIR].codes[2]);
|
||||
printf("c3=%d\n", ecl_ucd_names_pair[pair - ECL_UCD_FIRST_PAIR].codes[3]);
|
||||
*/
|
||||
fill_pair_name(buffer, (((unsigned int)p.codes[1]) << 8) | p.codes[0]);
|
||||
fill_pair_name(buffer, (((unsigned int)p.codes[3]) << 8) | p.codes[2]);
|
||||
}
|
||||
}
|
||||
|
||||
cl_object
|
||||
_ecl_ucd_code_to_name(ecl_character c)
|
||||
{
|
||||
int pair = search_pair(c);
|
||||
if (pair < 0)
|
||||
return ECL_NIL;
|
||||
else {
|
||||
char buffer[ECL_UCD_LARGEST_CHAR_NAME+1];
|
||||
buffer[0] = 0;
|
||||
fill_pair_name(buffer, pair);
|
||||
return make_base_string_copy(buffer);
|
||||
}
|
||||
}
|
||||
|
||||
cl_object
|
||||
_ecl_ucd_name_to_code(cl_object name)
|
||||
{
|
||||
int mid, low = 0, up = ECL_UCD_TOTAL_NAMES-1;
|
||||
int l = ecl_length(name);
|
||||
if (l <= ECL_UCD_LARGEST_CHAR_NAME) {
|
||||
char buffer1[ECL_UCD_LARGEST_CHAR_NAME+1];
|
||||
char buffer2[ECL_UCD_LARGEST_CHAR_NAME+1];
|
||||
for (mid = 0; mid < l; mid++) {
|
||||
ecl_character c = ecl_char_upcase(ecl_char(name, mid));
|
||||
buffer1[mid] = c;
|
||||
if (c < 32 || c > 127) /* All character names are [-A-Z_0-9]* */
|
||||
return ECL_NIL;
|
||||
}
|
||||
buffer1[mid] = 0;
|
||||
do {
|
||||
ecl_ucd_code_and_pair p = ecl_ucd_sorted_pairs[mid = (low + up) / 2];
|
||||
int flag, pair = ((unsigned int)p.pair[1] << 8) | p.pair[0];
|
||||
buffer2[0] = 0;
|
||||
fill_pair_name(buffer2, pair);
|
||||
flag = strcmp(buffer1, buffer2);
|
||||
/*
|
||||
printf("[%d,%d,%d] %s <> (%d)%s -> %d\n",
|
||||
low, mid, up, buffer1, pair, buffer2, flag);
|
||||
*/
|
||||
if (flag == 0) {
|
||||
return ecl_make_fixnum(((unsigned int)p.code[2] << 16) |
|
||||
((unsigned int)p.code[1] << 8) |
|
||||
p.code[0]);
|
||||
} else if (flag < 0) {
|
||||
up = mid - 1;
|
||||
} else {
|
||||
low = mid + 1;
|
||||
}
|
||||
} while (low <= up);
|
||||
}
|
||||
return ECL_NIL;
|
||||
}
|
||||
|
||||
32923
src/c/unicode/ucd_names_codes.c
Normal file
32923
src/c/unicode/ucd_names_codes.c
Normal file
File diff suppressed because it is too large
Load diff
38002
src/c/unicode/ucd_names_pair.c
Normal file
38002
src/c/unicode/ucd_names_pair.c
Normal file
File diff suppressed because it is too large
Load diff
9708
src/c/unicode/ucd_names_str.c
Normal file
9708
src/c/unicode/ucd_names_str.c
Normal file
File diff suppressed because it is too large
Load diff
17
src/configure
vendored
17
src/configure
vendored
|
|
@ -806,6 +806,7 @@ with_profile_cflags
|
|||
with_newcmp
|
||||
with_extra_files
|
||||
with_init_form
|
||||
with_unicode_names
|
||||
with_x
|
||||
'
|
||||
ac_precious_vars='build_alias
|
||||
|
|
@ -1522,6 +1523,7 @@ Optional Packages:
|
|||
--with-extra-files list of additional source files (default="")
|
||||
--with-init-form lisp forms to execute at startup
|
||||
(default="(si::top-level t)")
|
||||
--with-unicode-names link in the database of Unicode names (YES,no)
|
||||
--with-x use the X Window System
|
||||
|
||||
Some influential environment variables:
|
||||
|
|
@ -2989,6 +2991,15 @@ else
|
|||
fi
|
||||
|
||||
|
||||
|
||||
# Check whether --with-unicode-names was given.
|
||||
if test "${with_unicode_names+set}" = set; then :
|
||||
withval=$with_unicode_names;
|
||||
else
|
||||
with_unicode_names="yes"
|
||||
fi
|
||||
|
||||
|
||||
ecldir="${libdir}/ecl-${PACKAGE_VERSION}"
|
||||
|
||||
test -z "${docdir}" && docdir="${datadir}/doc/ecl-${PACKAGE_VERSION}"
|
||||
|
|
@ -9403,6 +9414,12 @@ $as_echo "#define ECL_UNICODE 21" >>confdefs.h
|
|||
ECL_CHARACTER=$ECL_INT32_T
|
||||
EXTRA_OBJS="$EXTRA_OBJS unicode/ucd.o unicode/ucd-0000.o unicode/ucd-0016.o unicode/ucd-0032.o unicode/ucd-0048.o unicode/ucd-0064.o unicode/ucd-0080.o unicode/ucd-0096.o"
|
||||
fi
|
||||
if test "${with_unicode_names}" = "yes"; then
|
||||
|
||||
$as_echo "#define ECL_UNICODE_NAMES 1" >>confdefs.h
|
||||
|
||||
EXTRA_OBJS="$EXTRA_OBJS unicode/ucd_names_char.o unicode/ucd_names_codes.o unicode/ucd_names_pair.o unicode/ucd_names_str.o"
|
||||
fi
|
||||
else
|
||||
CHAR_CODE_LIMIT=256
|
||||
ECL_CHARACTER="int"
|
||||
|
|
|
|||
|
|
@ -294,6 +294,12 @@ AC_ARG_WITH(init-form,
|
|||
[lisp forms to execute at startup (default="(si::top-level t)")]),
|
||||
[with_init_form="${withval}"], [with_init_form=""])
|
||||
|
||||
AC_ARG_WITH(unicode-names,
|
||||
AS_HELP_STRING( [--with-unicode-names],
|
||||
[link in the database of Unicode names]
|
||||
[(YES,no)]),
|
||||
[],[with_unicode_names="yes"])
|
||||
|
||||
dnl -----------------------------------------------------------------------
|
||||
dnl Installation directories
|
||||
ecldir="${libdir}/ecl-${PACKAGE_VERSION}"
|
||||
|
|
@ -852,6 +858,10 @@ if test "x${enable_unicode}" != "xno"; then
|
|||
ECL_CHARACTER=$ECL_INT32_T
|
||||
EXTRA_OBJS="$EXTRA_OBJS unicode/ucd.o unicode/ucd-0000.o unicode/ucd-0016.o unicode/ucd-0032.o unicode/ucd-0048.o unicode/ucd-0064.o unicode/ucd-0080.o unicode/ucd-0096.o"
|
||||
fi
|
||||
if test "${with_unicode_names}" = "yes"; then
|
||||
AC_DEFINE(ECL_UNICODE_NAMES, [1], [Link in the database of Unicode names])
|
||||
EXTRA_OBJS="$EXTRA_OBJS unicode/ucd_names_char.o unicode/ucd_names_codes.o unicode/ucd_names_pair.o unicode/ucd_names_str.o"
|
||||
fi
|
||||
else
|
||||
CHAR_CODE_LIMIT=256
|
||||
ECL_CHARACTER="int"
|
||||
|
|
|
|||
|
|
@ -102,6 +102,8 @@
|
|||
|
||||
/* Support for Unicode strings */
|
||||
#undef ECL_UNICODE
|
||||
/* Link in the Unicode names for all characters (takes ~0.5 Mb) */
|
||||
#undef ECL_UNICODE_NAMES
|
||||
|
||||
/* Allow STREAM operations to work on arbitrary objects */
|
||||
#undef ECL_CLOS_STREAMS
|
||||
|
|
|
|||
|
|
@ -1901,6 +1901,10 @@ extern ECL_API cl_object si_coerce_to_base_string(cl_object x);
|
|||
extern ECL_API cl_object si_coerce_to_extended_string(cl_object x);
|
||||
#define ecl_alloc_simple_extended_string(l) ecl_alloc_simple_vector((l),ecl_aet_ch)
|
||||
extern ECL_API cl_object ecl_alloc_adjustable_extended_string(cl_index l);
|
||||
# ifdef ECL_UNICODE_NAMES
|
||||
extern ECL_API cl_object _ecl_ucd_code_to_name(ecl_character c);
|
||||
extern ECL_API cl_object _ecl_ucd_name_to_code(cl_object name);
|
||||
# endif
|
||||
#else
|
||||
#define si_base_char_p cl_characterp
|
||||
#define si_base_string_p cl_stringp
|
||||
|
|
|
|||
|
|
@ -1428,6 +1428,7 @@ package."
|
|||
(*print-readably* nil)
|
||||
(*print-pretty* nil)
|
||||
(*print-circle* t)
|
||||
(*print-length* 2)
|
||||
(*readtable* (or *break-readtable* *readtable*))
|
||||
(*break-message* (format nil "~&Condition of type: ~A~%~A~%"
|
||||
(type-of condition) condition))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue