Implemented custom encodings via invertible mappings.

This commit is contained in:
Juan Jose Garcia Ripoll 2009-01-02 19:40:53 +01:00
parent 6e5902bd08
commit 944ffddb2e
4 changed files with 94 additions and 14 deletions

View file

@ -25,9 +25,9 @@ ECL 9.1.0:
as a base-string.
- ECL supports external formats. They may be a symbol, denoting the encoding
or an encoding option, or a list of symbols. Valid symbols are :DEFAULT,
:LATIN-1, :ISO-8859-1, :UTF-8, :UCS-{2,4}{,BE,LE} :CR, :LF and :CRLF. Default
option is :LF.
or an encoding option, an association table between bytes and unicode codes
or a list of these. Valid symbols are :DEFAULT, :LATIN-1, :ISO-8859-1,
:UTF-8, :UCS-{2,4}{,BE,LE} :CR, :LF and :CRLF. Default option is :LF.
* Bugs fixed:

View file

@ -725,6 +725,56 @@ ucs_2_encoder(cl_object stream, unsigned char *buffer, int c)
return 2 + ucs_2be_encoder(stream, buffer+2, c);
}
/*
* UCS-2 BOM ENDIAN
*/
static int
user_decoder(cl_object stream, cl_eformat_read_byte8 read_byte8, cl_object source)
{
cl_object table = stream->stream.format_table;
cl_object character, byte;
unsigned char buffer[2];
if (read_byte8(source, buffer, 1) < 1) {
return EOF;
}
byte = MAKE_FIXNUM(buffer);
character = ecl_gethash_safe(MAKE_FIXNUM(buffer[0]), table, Cnil);
if (Null(character)) {
return EOF-2;
}
if (character == Ct) {
if (read_byte8(source, buffer+1, 1) < 1) {
return EOF;
}
character = ecl_gethash_safe(MAKE_FIXNUM((buffer[0]<<8+buffer[1])),
table, Cnil);
if (Null(character)) {
return EOF-2;
}
}
return CHAR_CODE(character);
}
static int
user_encoder(cl_object stream, unsigned char *buffer, int c)
{
cl_object byte = ecl_gethash_safe(CODE_CHAR(c), stream->stream.format_table, Cnil);
if (Null(byte)) {
return 0;
} else {
cl_fixnum code = fix(byte);
if (c > 0xFF) {
buffer[1] = c & 8; c >>= 8;
buffer[0] = c;
return 2;
} else {
buffer[0] = c;
return 1;
}
}
}
/*
* UTF-8
*/
@ -2519,15 +2569,14 @@ const struct ecl_file_ops input_file_ops = {
static int
parse_external_format(cl_object strm, cl_object format)
parse_external_format(cl_object stream, cl_object format)
{
if (CONSP(format)) {
int flags = 0;
do {
flags |= parse_external_format(strm, ECL_CONS_CAR(format));
flags |= parse_external_format(stream, ECL_CONS_CAR(format));
format = cl_cdr(format);
} while (CONSP(format));
printf("%d\n", flags);
return flags;
}
if (FIXNUMP(format)) {
@ -2582,6 +2631,10 @@ parse_external_format(cl_object strm, cl_object format)
/* Binary stream */
return 0;
}
if (type_of(format) == t_hashtable) {
stream->stream.format_table = format;
return ECL_STREAM_USER_FORMAT;
}
FEerror("Unknown external format: ~A", 1, format);
return ECL_STREAM_DEFAULT_FORMAT;
}
@ -2668,6 +2721,13 @@ set_stream_elt_type(cl_object stream, cl_fixnum byte_size, int flags,
stream->stream.decoder = ucs_4le_decoder;
break;
#endif
case ECL_STREAM_USER_FORMAT:
IO_STREAM_ELT_TYPE(stream) = @'character';
byte_size = 8;
stream->stream.format = stream->stream.format_table;
stream->stream.encoder = user_encoder;
stream->stream.decoder = user_decoder;
break;
default:
FEerror("Invalid external format code ~D with ~A",
1, MAKE_FIXNUM(flags), external_format);

View file

@ -526,12 +526,12 @@ enum {
ECL_STREAM_UCS_4 = 6,
ECL_STREAM_UCS_4LE = 7,
ECL_STREAM_UCS_4BE = 8,
ECL_STREAM_8BIT = 5,
ECL_STREAM_CR = 8,
ECL_STREAM_LF = 16,
ECL_STREAM_SIGNED_BYTES = 32,
ECL_STREAM_C_STREAM = 64,
ECL_STREAM_MIGHT_SEEK = 128
ECL_STREAM_USER_FORMAT = 9,
ECL_STREAM_CR = 16,
ECL_STREAM_LF = 32,
ECL_STREAM_SIGNED_BYTES = 64,
ECL_STREAM_C_STREAM = 128,
ECL_STREAM_MIGHT_SEEK = 256
};
typedef int (*cl_eformat_encoder)(cl_object stream, unsigned char *buffer, int c);
@ -556,8 +556,7 @@ struct ecl_stream {
cl_object format; /* external format */
cl_eformat_encoder encoder;
cl_eformat_decoder decoder;
cl_object encoder_table;
cl_object decoder_table;
cl_object format_table;
int flags; /* character table, flags, etc */
};

View file

@ -287,3 +287,24 @@ the one defined in the ANSI standard. *print-base* is 10, *print-array* is t,
(print-unreadable-object-function
,object ,stream ,type ,identity #'.print-unreadable-object-body.))
`(print-unreadable-object-function ,object ,stream ,type ,identity nil)))
(defun ext::load-encoding (name)
(let ((filename (make-pathname :name (symbol-name name) :defaults "SYS:encodings;")))
(print name)
(unless (probe-file filename)
(error "Unable to find mapping file ~A for encoding ~A" filename name))
(with-open-file (s filename :direction :input)
(read s))))
(defun ext::make-encoding (mapping)
(when (symbolp mapping)
(setf mapping (load-encoding mapping)))
(let ((output (make-hash-table :size 512 :test 'eq)))
(dolist (record mapping output)
(let* ((byte (car record))
(unicode (cdr record))
(unicode-char (code-char unicode)))
(when (> byte #xFF)
(setf (gethash (ash byte -8) output) t))
(setf (gethash byte output) unicode-char)
(setf (gethash unicode-char output) byte)))))