mirror of
https://gitlab.com/embeddable-common-lisp/ecl.git
synced 2025-12-15 15:21:03 -08:00
298 lines
9.5 KiB
Common Lisp
298 lines
9.5 KiB
Common Lisp
(defparameter *destination*
|
|
(merge-pathnames "../../src/c/unicode/"
|
|
(or *load-truename* *compile-pathname*)))
|
|
|
|
(let* ((translated-data (copy-tree *compressed-data*))
|
|
(pairs (copy-tree *paired-data*))
|
|
(first-code (loop for (pair-code . pair) in pairs minimize pair-code))
|
|
(last-code (loop for (pair-code . pair) in pairs maximize pair-code)))
|
|
;;
|
|
;; We make sure that for each character there is a unique pair which is not
|
|
;; used anywhere else
|
|
;;
|
|
(loop with used-code = (make-array (1+ last-code) :initial-element nil)
|
|
for line in translated-data
|
|
for pair-code = (third line)
|
|
do (cond ((/= (length line) 3)
|
|
(error "Error in compressed data: too long code ~A" line))
|
|
((or (aref used-code pair-code)
|
|
(< pair-code first-code))
|
|
(let ((new-pair (cons pair-code 0)))
|
|
(setf pairs (acons (incf last-code) new-pair pairs)
|
|
(third line) last-code)))
|
|
(t
|
|
(setf (aref used-code pair-code) t))))
|
|
;;
|
|
;; We now renumber all pairs.
|
|
;;
|
|
(let ((translation-table (make-array (1+ last-code) :initial-element nil))
|
|
(counter -1))
|
|
(flet ((add-code (code)
|
|
(or (aref translation-table code)
|
|
(setf (aref translation-table code) (incf counter))))
|
|
(translate (old-code)
|
|
(or (aref translation-table old-code)
|
|
(error "Unknown code ~A" old-code))))
|
|
;; First of all we add the words
|
|
(loop for i from 0 below first-code
|
|
do (add-code i))
|
|
;; Then we add all pairs that represent characters, so that they
|
|
;; are consecutive, too.
|
|
(loop for line in translated-data
|
|
do (setf (third line) (add-code (third line))))
|
|
;; Finally, we add the remaining pairs
|
|
(loop for record in pairs
|
|
do (setf (car record) (add-code (car record))))
|
|
;; ... and we fix the definitions
|
|
(loop for (code . pair) in pairs
|
|
do (setf (car pair) (translate (car pair))
|
|
(cdr pair) (translate (cdr pair))))))
|
|
(defparameter *sorted-compressed-data* translated-data)
|
|
(defparameter *sorted-pairs* (sort pairs #'< :key #'car))
|
|
(print 'finished)
|
|
)
|
|
|
|
(defparameter *grouped-characters*
|
|
(loop with last-ucd-code = nil
|
|
with start-ucd-code = nil
|
|
with start-code = nil
|
|
with output = '()
|
|
with aux = '()
|
|
for n from (third (first *sorted-compressed-data*))
|
|
for line in *sorted-compressed-data*
|
|
for (ucd-code name code) = line
|
|
do (cond ((/= code n)
|
|
(error "Codes in *sorted-compressed-data* are not consecutive:~%~A"
|
|
(cons line (subseq aux 0 10))))
|
|
((null start-ucd-code)
|
|
(setf start-ucd-code ucd-code
|
|
start-code code))
|
|
((= last-ucd-code (1- ucd-code))
|
|
)
|
|
(t
|
|
(push (list start-ucd-code last-ucd-code start-code)
|
|
output)
|
|
(setf start-ucd-code ucd-code
|
|
start-code code)))
|
|
(setf last-ucd-code ucd-code aux (cons line aux))
|
|
finally (return (nreverse output))))
|
|
|
|
(with-open-file (s (merge-pathnames "ucd_names.h" *destination*)
|
|
:direction :output
|
|
:if-exists :supersede)
|
|
(format s "/*
|
|
* UNICODE NAMES DATABASE
|
|
*/
|
|
#ifndef ECL_UCD_NAMES_H
|
|
#define ECL_UCD_NAMES_H 1
|
|
|
|
#define ECL_UCD_FIRST_PAIR ~D
|
|
#define ECL_UCD_TOTAL_PAIRS ~D
|
|
#define ECL_UCD_TOTAL_GROUPS ~D
|
|
#define ECL_UCD_LARGEST_CHAR_NAME ~D
|
|
#define ECL_UCD_TOTAL_NAMES ~D
|
|
|
|
typedef struct {
|
|
unsigned char codes[4];
|
|
} ecl_ucd_names_pair_type;
|
|
|
|
typedef struct {
|
|
int smallest, largest, pair_code;
|
|
} ecl_ucd_names_char_group;
|
|
|
|
typedef struct {
|
|
unsigned char pair[2];
|
|
unsigned char code[3];
|
|
} ecl_ucd_code_and_pair;
|
|
|
|
extern const ecl_ucd_names_pair_type ecl_ucd_names_pair[ECL_UCD_TOTAL_PAIRS];
|
|
extern const ecl_ucd_names_char_group ecl_ucd_names_char[ECL_UCD_TOTAL_GROUPS];
|
|
extern const char *ecl_ucd_names_word[ECL_UCD_FIRST_PAIR];
|
|
extern const ecl_ucd_code_and_pair ecl_ucd_sorted_pairs[ECL_UCD_TOTAL_NAMES];
|
|
|
|
#endif
|
|
"
|
|
(1+ *last-word-index*)
|
|
(length *sorted-pairs*)
|
|
(length *grouped-characters*)
|
|
(loop for (code name . rest) in *compressed-data*
|
|
maximize (length name))
|
|
(length *compressed-data*)
|
|
))
|
|
|
|
(with-open-file (s (merge-pathnames "ucd_names_pair.c" *destination*)
|
|
:direction :output
|
|
:if-exists :supersede)
|
|
(format s "/*
|
|
* Pairs of symbols.
|
|
*/
|
|
|
|
#include <ecl/ecl.h>
|
|
#include \"ucd_names.h\"
|
|
|
|
const ecl_ucd_names_pair_type ecl_ucd_names_pair[ECL_UCD_TOTAL_PAIRS] = {
|
|
"
|
|
(length *sorted-pairs*) (length *sorted-pairs*))
|
|
(loop for i from 0
|
|
for (pair-code . (a . b)) in *sorted-pairs*
|
|
do (format s "~A{~D, ~D, ~D, ~D}~%"
|
|
(if (plusp i) "," "")
|
|
(logand a #xff) (ash a -8)
|
|
(logand b #xff) (ash b -8)
|
|
))
|
|
(format s "};~%"))
|
|
|
|
(with-open-file (s (merge-pathnames "ucd_names_codes.c" *destination*)
|
|
:direction :output
|
|
:if-exists :supersede)
|
|
(format s "/*
|
|
* Sorted character names.
|
|
*/
|
|
|
|
#include <ecl/ecl.h>
|
|
#include \"ucd_names.h\"
|
|
|
|
const ecl_ucd_code_and_pair ecl_ucd_sorted_pairs[ECL_UCD_TOTAL_NAMES] = {
|
|
")
|
|
(loop with l = (sort (copy-tree *sorted-compressed-data*) #'string<= :key #'second)
|
|
for (ucd-code name code) in l
|
|
for i from 0
|
|
do (format s "~A{{~D, ~D}, {~D, ~D, ~D}}~%"
|
|
(if (plusp i) "," "")
|
|
(logand code #xff) (ash code -8)
|
|
(logand ucd-code #xff) (logand (ash ucd-code -8) #xff)
|
|
(logand (ash ucd-code -16) #xff)))
|
|
(format s "};"))
|
|
|
|
(with-open-file (s (merge-pathnames "ucd_names_str.c" *destination*)
|
|
:direction :output
|
|
:if-exists :supersede)
|
|
(format s "/*
|
|
* Dictionary words.
|
|
*/
|
|
|
|
#include <ecl/ecl.h>
|
|
#include \"ucd_names.h\"
|
|
|
|
const char *ecl_ucd_names_word[ECL_UCD_FIRST_PAIR] = {
|
|
")
|
|
(loop for i from 0
|
|
for c across *words-array*
|
|
do (format s "~A~S~%" (if (plusp i) "," "") (or c "")))
|
|
(format s "};~%"))
|
|
|
|
(with-open-file (s (merge-pathnames "ucd_names_char.c" *destination*)
|
|
:direction :output
|
|
:if-exists :supersede)
|
|
(format s "/*
|
|
* Dictionary words.
|
|
*/
|
|
|
|
#include <string.h>
|
|
#include <ecl/ecl.h>
|
|
#include \"ucd_names.h\"
|
|
|
|
const ecl_ucd_names_char_group ecl_ucd_names_char[ECL_UCD_TOTAL_GROUPS] = {
|
|
"
|
|
(length *grouped-characters*))
|
|
(loop for i from 0
|
|
for (start end pair-code) in *grouped-characters*
|
|
do (format s "~A{~D,~D,~D}~%" (if (plusp i) "," "")
|
|
start end pair-code))
|
|
(format s "};
|
|
|
|
static int
|
|
search_pair(ecl_character c)
|
|
{
|
|
int mid, low = 0, up = ECL_UCD_TOTAL_GROUPS-1;
|
|
do {
|
|
mid = (up + low) / 2;
|
|
if (c < ecl_ucd_names_char[mid].smallest)
|
|
up = mid-1;
|
|
else if (c > ecl_ucd_names_char[mid].largest)
|
|
low = mid+1;
|
|
else
|
|
return (c - ecl_ucd_names_char[mid].smallest) +
|
|
ecl_ucd_names_char[mid].pair_code;
|
|
} while (low <= up && (low >= 0) && (up < ECL_UCD_TOTAL_GROUPS));
|
|
return -1;
|
|
}
|
|
|
|
static void
|
|
fill_pair_name(char *buffer, int pair)
|
|
{
|
|
if (pair < ECL_UCD_FIRST_PAIR) {
|
|
strncat(buffer, ecl_ucd_names_word[pair], ECL_UCD_LARGEST_CHAR_NAME+1);
|
|
/*
|
|
printf(\"text=%s\\n\", ecl_ucd_names_word[pair]);
|
|
*/
|
|
} else {
|
|
const ecl_ucd_names_pair_type p = ecl_ucd_names_pair[pair - ECL_UCD_FIRST_PAIR];
|
|
/*
|
|
printf(\"ndx=%d\\n\", pair - ECL_UCD_FIRST_PAIR);
|
|
printf(\"c0=%d\\n\", ecl_ucd_names_pair[pair - ECL_UCD_FIRST_PAIR].codes[0]);
|
|
printf(\"c1=%d\\n\", ecl_ucd_names_pair[pair - ECL_UCD_FIRST_PAIR].codes[1]);
|
|
printf(\"c2=%d\\n\", ecl_ucd_names_pair[pair - ECL_UCD_FIRST_PAIR].codes[2]);
|
|
printf(\"c3=%d\\n\", ecl_ucd_names_pair[pair - ECL_UCD_FIRST_PAIR].codes[3]);
|
|
*/
|
|
fill_pair_name(buffer, (((unsigned int)p.codes[1]) << 8) | p.codes[0]);
|
|
fill_pair_name(buffer, (((unsigned int)p.codes[3]) << 8) | p.codes[2]);
|
|
}
|
|
}
|
|
|
|
cl_object
|
|
_ecl_ucd_code_to_name(ecl_character c)
|
|
{
|
|
int pair = search_pair(c);
|
|
if (pair < 0)
|
|
return ECL_NIL;
|
|
else {
|
|
char buffer[ECL_UCD_LARGEST_CHAR_NAME+1];
|
|
buffer[0] = 0;
|
|
fill_pair_name(buffer, pair);
|
|
return make_base_string_copy(buffer);
|
|
}
|
|
}
|
|
|
|
cl_object
|
|
_ecl_ucd_name_to_code(cl_object name)
|
|
{
|
|
int mid, low = 0, up = ECL_UCD_TOTAL_NAMES-1;
|
|
int l = ecl_length(name);
|
|
if (l <= ECL_UCD_LARGEST_CHAR_NAME) {
|
|
char buffer1[ECL_UCD_LARGEST_CHAR_NAME+1];
|
|
char buffer2[ECL_UCD_LARGEST_CHAR_NAME+1];
|
|
for (mid = 0; mid < l; mid++) {
|
|
ecl_character c = ecl_char_upcase(ecl_char(name, mid));
|
|
buffer1[mid] = c;
|
|
if (c < 32 || c > 127) /* All character names are [-A-Z_0-9]* */
|
|
return ECL_NIL;
|
|
}
|
|
buffer1[mid] = 0;
|
|
do {
|
|
ecl_ucd_code_and_pair p = ecl_ucd_sorted_pairs[mid = (low + up) / 2];
|
|
int flag, pair = ((unsigned int)p.pair[1] << 8) | p.pair[0];
|
|
buffer2[0] = 0;
|
|
fill_pair_name(buffer2, pair);
|
|
flag = strcmp(buffer1, buffer2);
|
|
/*
|
|
printf(\"[%d,%d,%d] %s <> (%d)%s -> %d\\n\",
|
|
low, mid, up, buffer1, pair, buffer2, flag);
|
|
*/
|
|
if (flag == 0) {
|
|
return ecl_make_fixnum(((unsigned int)p.code[2] << 16) |
|
|
((unsigned int)p.code[1] << 8) |
|
|
p.code[0]);
|
|
} else if (flag < 0) {
|
|
up = mid - 1;
|
|
} else {
|
|
low = mid + 1;
|
|
}
|
|
} while (low <= up);
|
|
}
|
|
return ECL_NIL;
|
|
}
|
|
|
|
"))
|
|
|
|
;(ext:run-program "/bin/sh" '("-c" "cp *.c *.h ~/devel/ecl/src/c/unicode/"))
|