mirror of
https://gitlab.com/embeddable-common-lisp/ecl.git
synced 2026-03-09 14:40:37 -07:00
Implemented custom encodings via invertible mappings.
This commit is contained in:
parent
6e5902bd08
commit
944ffddb2e
4 changed files with 94 additions and 14 deletions
|
|
@ -25,9 +25,9 @@ ECL 9.1.0:
|
|||
as a base-string.
|
||||
|
||||
- ECL supports external formats. They may be a symbol, denoting the encoding
|
||||
or an encoding option, or a list of symbols. Valid symbols are :DEFAULT,
|
||||
:LATIN-1, :ISO-8859-1, :UTF-8, :UCS-{2,4}{,BE,LE} :CR, :LF and :CRLF. Default
|
||||
option is :LF.
|
||||
or an encoding option, an association table between bytes and unicode codes
|
||||
or a list of these. Valid symbols are :DEFAULT, :LATIN-1, :ISO-8859-1,
|
||||
:UTF-8, :UCS-{2,4}{,BE,LE} :CR, :LF and :CRLF. Default option is :LF.
|
||||
|
||||
* Bugs fixed:
|
||||
|
||||
|
|
|
|||
66
src/c/file.d
66
src/c/file.d
|
|
@ -725,6 +725,56 @@ ucs_2_encoder(cl_object stream, unsigned char *buffer, int c)
|
|||
return 2 + ucs_2be_encoder(stream, buffer+2, c);
|
||||
}
|
||||
|
||||
/*
|
||||
* UCS-2 BOM ENDIAN
|
||||
*/
|
||||
|
||||
static int
|
||||
user_decoder(cl_object stream, cl_eformat_read_byte8 read_byte8, cl_object source)
|
||||
{
|
||||
cl_object table = stream->stream.format_table;
|
||||
cl_object character, byte;
|
||||
unsigned char buffer[2];
|
||||
if (read_byte8(source, buffer, 1) < 1) {
|
||||
return EOF;
|
||||
}
|
||||
byte = MAKE_FIXNUM(buffer);
|
||||
character = ecl_gethash_safe(MAKE_FIXNUM(buffer[0]), table, Cnil);
|
||||
if (Null(character)) {
|
||||
return EOF-2;
|
||||
}
|
||||
if (character == Ct) {
|
||||
if (read_byte8(source, buffer+1, 1) < 1) {
|
||||
return EOF;
|
||||
}
|
||||
character = ecl_gethash_safe(MAKE_FIXNUM((buffer[0]<<8+buffer[1])),
|
||||
table, Cnil);
|
||||
if (Null(character)) {
|
||||
return EOF-2;
|
||||
}
|
||||
}
|
||||
return CHAR_CODE(character);
|
||||
}
|
||||
|
||||
static int
|
||||
user_encoder(cl_object stream, unsigned char *buffer, int c)
|
||||
{
|
||||
cl_object byte = ecl_gethash_safe(CODE_CHAR(c), stream->stream.format_table, Cnil);
|
||||
if (Null(byte)) {
|
||||
return 0;
|
||||
} else {
|
||||
cl_fixnum code = fix(byte);
|
||||
if (c > 0xFF) {
|
||||
buffer[1] = c & 8; c >>= 8;
|
||||
buffer[0] = c;
|
||||
return 2;
|
||||
} else {
|
||||
buffer[0] = c;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* UTF-8
|
||||
*/
|
||||
|
|
@ -2519,15 +2569,14 @@ const struct ecl_file_ops input_file_ops = {
|
|||
|
||||
|
||||
static int
|
||||
parse_external_format(cl_object strm, cl_object format)
|
||||
parse_external_format(cl_object stream, cl_object format)
|
||||
{
|
||||
if (CONSP(format)) {
|
||||
int flags = 0;
|
||||
do {
|
||||
flags |= parse_external_format(strm, ECL_CONS_CAR(format));
|
||||
flags |= parse_external_format(stream, ECL_CONS_CAR(format));
|
||||
format = cl_cdr(format);
|
||||
} while (CONSP(format));
|
||||
printf("%d\n", flags);
|
||||
return flags;
|
||||
}
|
||||
if (FIXNUMP(format)) {
|
||||
|
|
@ -2582,6 +2631,10 @@ parse_external_format(cl_object strm, cl_object format)
|
|||
/* Binary stream */
|
||||
return 0;
|
||||
}
|
||||
if (type_of(format) == t_hashtable) {
|
||||
stream->stream.format_table = format;
|
||||
return ECL_STREAM_USER_FORMAT;
|
||||
}
|
||||
FEerror("Unknown external format: ~A", 1, format);
|
||||
return ECL_STREAM_DEFAULT_FORMAT;
|
||||
}
|
||||
|
|
@ -2668,6 +2721,13 @@ set_stream_elt_type(cl_object stream, cl_fixnum byte_size, int flags,
|
|||
stream->stream.decoder = ucs_4le_decoder;
|
||||
break;
|
||||
#endif
|
||||
case ECL_STREAM_USER_FORMAT:
|
||||
IO_STREAM_ELT_TYPE(stream) = @'character';
|
||||
byte_size = 8;
|
||||
stream->stream.format = stream->stream.format_table;
|
||||
stream->stream.encoder = user_encoder;
|
||||
stream->stream.decoder = user_decoder;
|
||||
break;
|
||||
default:
|
||||
FEerror("Invalid external format code ~D with ~A",
|
||||
1, MAKE_FIXNUM(flags), external_format);
|
||||
|
|
|
|||
|
|
@ -526,12 +526,12 @@ enum {
|
|||
ECL_STREAM_UCS_4 = 6,
|
||||
ECL_STREAM_UCS_4LE = 7,
|
||||
ECL_STREAM_UCS_4BE = 8,
|
||||
ECL_STREAM_8BIT = 5,
|
||||
ECL_STREAM_CR = 8,
|
||||
ECL_STREAM_LF = 16,
|
||||
ECL_STREAM_SIGNED_BYTES = 32,
|
||||
ECL_STREAM_C_STREAM = 64,
|
||||
ECL_STREAM_MIGHT_SEEK = 128
|
||||
ECL_STREAM_USER_FORMAT = 9,
|
||||
ECL_STREAM_CR = 16,
|
||||
ECL_STREAM_LF = 32,
|
||||
ECL_STREAM_SIGNED_BYTES = 64,
|
||||
ECL_STREAM_C_STREAM = 128,
|
||||
ECL_STREAM_MIGHT_SEEK = 256
|
||||
};
|
||||
|
||||
typedef int (*cl_eformat_encoder)(cl_object stream, unsigned char *buffer, int c);
|
||||
|
|
@ -556,8 +556,7 @@ struct ecl_stream {
|
|||
cl_object format; /* external format */
|
||||
cl_eformat_encoder encoder;
|
||||
cl_eformat_decoder decoder;
|
||||
cl_object encoder_table;
|
||||
cl_object decoder_table;
|
||||
cl_object format_table;
|
||||
int flags; /* character table, flags, etc */
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -287,3 +287,24 @@ the one defined in the ANSI standard. *print-base* is 10, *print-array* is t,
|
|||
(print-unreadable-object-function
|
||||
,object ,stream ,type ,identity #'.print-unreadable-object-body.))
|
||||
`(print-unreadable-object-function ,object ,stream ,type ,identity nil)))
|
||||
|
||||
(defun ext::load-encoding (name)
|
||||
(let ((filename (make-pathname :name (symbol-name name) :defaults "SYS:encodings;")))
|
||||
(print name)
|
||||
(unless (probe-file filename)
|
||||
(error "Unable to find mapping file ~A for encoding ~A" filename name))
|
||||
(with-open-file (s filename :direction :input)
|
||||
(read s))))
|
||||
|
||||
(defun ext::make-encoding (mapping)
|
||||
(when (symbolp mapping)
|
||||
(setf mapping (load-encoding mapping)))
|
||||
(let ((output (make-hash-table :size 512 :test 'eq)))
|
||||
(dolist (record mapping output)
|
||||
(let* ((byte (car record))
|
||||
(unicode (cdr record))
|
||||
(unicode-char (code-char unicode)))
|
||||
(when (> byte #xFF)
|
||||
(setf (gethash (ash byte -8) output) t))
|
||||
(setf (gethash byte output) unicode-char)
|
||||
(setf (gethash unicode-char output) byte)))))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue