mirror of
https://gitlab.com/embeddable-common-lisp/ecl.git
synced 2025-12-05 18:30:24 -08:00
streams: add binary encoders and decoders to the mix
This will allow us to transcode characters to bytes and vice versa. This is necessary to implement conductive UNREAD-BYTE and UNREAD-BYTE, but will allow us to also add low-level parsers for binary objects in the future.
This commit is contained in:
parent
5fe96b8339
commit
ba422ec9dd
6 changed files with 188 additions and 9 deletions
|
|
@ -99,9 +99,9 @@ WRITER_OBJS = print.obj float_to_digits.obj float_to_string.obj \
|
|||
|
||||
READER_OBJS = read.obj parse_integer.obj parse_number.obj
|
||||
|
||||
STREAM_OBJS = stream.obj file.obj strm_os.obj \
|
||||
strm_clos.obj strm_string.obj strm_composite.obj \
|
||||
strm_common.obj strm_sequence.obj strm_eformat.obj
|
||||
STREAM_OBJS = stream.obj file.obj strm_os.obj strm_clos.obj \
|
||||
strm_string.obj strm_composite.obj strm_common.obj \
|
||||
strm_sequence.obj strm_eformat.obj strm_binary.obj
|
||||
|
||||
FFI_OBJS = ffi.obj libraries.obj backtrace.obj mmap.obj cdata.obj
|
||||
|
||||
|
|
|
|||
|
|
@ -72,9 +72,9 @@ WRITER_OBJS = print.o printer/float_to_digits.o printer/float_to_string.o
|
|||
|
||||
READER_OBJS = read.o reader/parse_integer.o reader/parse_number.o
|
||||
|
||||
STREAM_OBJS = stream.o file.o streams/strm_os.o \
|
||||
streams/strm_clos.o streams/strm_string.o streams/strm_composite.o \
|
||||
streams/strm_common.o streams/strm_sequence.o streams/strm_eformat.o
|
||||
STREAM_OBJS = stream.o file.o streams/strm_os.o streams/strm_clos.o \
|
||||
streams/strm_string.o streams/strm_composite.o streams/strm_common.o \
|
||||
streams/strm_sequence.o streams/strm_eformat.o streams/strm_binary.o
|
||||
|
||||
FFI_OBJS = ffi.o ffi/libraries.o ffi/backtrace.o ffi/mmap.o ffi/cdata.o
|
||||
|
||||
|
|
|
|||
|
|
@ -40,6 +40,8 @@ ecl_alloc_stream(void)
|
|||
x->stream.buffer = NULL;
|
||||
x->stream.encoder = NULL;
|
||||
x->stream.decoder = NULL;
|
||||
x->stream.byte_encoder = NULL;
|
||||
x->stream.byte_decoder = NULL;
|
||||
x->stream.last_char = EOF;
|
||||
x->stream.byte_stack = ECL_NIL;
|
||||
x->stream.last_code[0] = x->stream.last_code[1] = EOF;
|
||||
|
|
|
|||
156
src/c/streams/strm_binary.d
Normal file
156
src/c/streams/strm_binary.d
Normal file
|
|
@ -0,0 +1,156 @@
|
|||
/* -*- Mode: C; c-basic-offset: 2; indent-tabs-mode: nil -*- */
|
||||
/* vim: set filetype=c tabstop=2 shiftwidth=2 expandtab: */
|
||||
|
||||
/*
|
||||
* strm_binary.d - Byte encoding/decoding for streams
|
||||
*
|
||||
* Copyright (c) 1984 Taiichi Yuasa and Masami Hagiya
|
||||
* Copyright (c) 1990 Giuseppe Attardi
|
||||
* Copyright (c) 2001 Juan Jose Garcia Ripoll
|
||||
* Copyright (c) 2025 Daniel Kochmanski
|
||||
*
|
||||
* See file 'LICENSE' for the copyright details.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <ecl/ecl.h>
|
||||
#include <ecl/internal.h>
|
||||
|
||||
/* Binary operators */
|
||||
|
||||
cl_object
|
||||
ecl_binary_read_byte(cl_object strm)
|
||||
{
|
||||
cl_index (*read_byte8)(cl_object, unsigned char *, cl_index);
|
||||
unsigned char buf[ENCODING_BUFFER_MAX_SIZE];
|
||||
cl_object byte;
|
||||
cl_index nbytes;
|
||||
byte = strm->stream.last_byte;
|
||||
unlikely_if (byte != OBJNULL) {
|
||||
strm->stream.last_byte = OBJNULL;
|
||||
return byte;
|
||||
}
|
||||
read_byte8 = strm->stream.ops->read_byte8;
|
||||
nbytes = strm->stream.byte_size/8;
|
||||
if (read_byte8(strm, buf, nbytes) < nbytes)
|
||||
return OBJNULL;
|
||||
return strm->stream.byte_decoder(strm, buf);
|
||||
}
|
||||
|
||||
void
|
||||
ecl_binary_write_byte(cl_object strm, cl_object byte)
|
||||
{
|
||||
cl_index (*write_byte8)(cl_object strm, unsigned char *c, cl_index n);
|
||||
cl_index nbytes = strm->stream.byte_size/8;
|
||||
unsigned char buf[ENCODING_BUFFER_MAX_SIZE];
|
||||
write_byte8 = strm->stream.ops->write_byte8;
|
||||
strm->stream.byte_encoder(strm, buf, byte);
|
||||
write_byte8(strm, buf, nbytes);
|
||||
}
|
||||
|
||||
/*
|
||||
* 8-bit unsigned
|
||||
*/
|
||||
|
||||
cl_object
|
||||
ecl_binary_u8_decoder(cl_object strm, unsigned char *buf)
|
||||
{
|
||||
unsigned char c = buf[0];
|
||||
return ecl_make_fixnum(c);
|
||||
}
|
||||
|
||||
void
|
||||
ecl_binary_u8_encoder(cl_object strm, unsigned char *buf, cl_object byte)
|
||||
{
|
||||
unsigned char c = ecl_to_uint8_t(byte);
|
||||
buf[0] = c;
|
||||
}
|
||||
|
||||
/*
|
||||
* 8-bit signed
|
||||
*/
|
||||
cl_object
|
||||
ecl_binary_s8_decoder(cl_object strm, unsigned char *buf)
|
||||
{
|
||||
signed char c = (signed char)buf[0];
|
||||
return ecl_make_fixnum(c);
|
||||
}
|
||||
|
||||
void
|
||||
ecl_binary_s8_encoder(cl_object strm, unsigned char *buf, cl_object byte)
|
||||
{
|
||||
signed char c = ecl_to_int8_t(byte);
|
||||
buf[0] = (unsigned char)c;
|
||||
}
|
||||
|
||||
/*
|
||||
* Big Endian
|
||||
*/
|
||||
cl_object
|
||||
ecl_binary_be_decoder(cl_object strm, unsigned char *buf)
|
||||
{
|
||||
cl_index idx, ndx = strm->stream.byte_size/8;
|
||||
cl_object output = OBJNULL;
|
||||
cl_object offset = ecl_make_fixnum(8);
|
||||
unsigned char c;
|
||||
for (idx=0; idx<ndx; idx++) {
|
||||
c = buf[idx];
|
||||
if (output) {
|
||||
output = cl_logior(2, ecl_make_fixnum(c), cl_ash(output, offset));
|
||||
} else {
|
||||
output = (strm->stream.flags & ECL_STREAM_SIGNED_BYTES)
|
||||
? ecl_make_fixnum((signed char)c)
|
||||
: ecl_make_fixnum((unsigned char)c);
|
||||
}
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
void
|
||||
ecl_binary_be_encoder(cl_object strm, unsigned char *buf, cl_object byte)
|
||||
{
|
||||
cl_index idx, ndx = strm->stream.byte_size/8;
|
||||
cl_object offset = ecl_make_fixnum(-8);
|
||||
cl_object mask = ecl_make_fixnum(0xFF);
|
||||
for (idx=0; idx<ndx; idx++) {
|
||||
cl_object b = cl_logand(2, byte, mask);
|
||||
buf[ndx-idx-1] = (unsigned char)ecl_fixnum(b);
|
||||
byte = cl_ash(byte, offset);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Little Endian
|
||||
*/
|
||||
cl_object
|
||||
ecl_binary_le_decoder(cl_object strm, unsigned char *buf)
|
||||
{
|
||||
cl_index idx, ndx = strm->stream.byte_size/8;
|
||||
cl_object output = OBJNULL;
|
||||
cl_object offset = ecl_make_fixnum(8);
|
||||
unsigned char c;
|
||||
for (idx=0; idx<ndx; idx++) {
|
||||
c = buf[ndx-idx-1];
|
||||
if (output) {
|
||||
output = cl_logior(2, ecl_make_fixnum(c), cl_ash(output, offset));
|
||||
} else {
|
||||
output = (strm->stream.flags & ECL_STREAM_SIGNED_BYTES)
|
||||
? ecl_make_fixnum((signed char)c)
|
||||
: ecl_make_fixnum((unsigned char)c);
|
||||
}
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
void
|
||||
ecl_binary_le_encoder(cl_object strm, unsigned char *buf, cl_object byte)
|
||||
{
|
||||
cl_index idx, ndx = strm->stream.byte_size/8;
|
||||
cl_object offset = ecl_make_fixnum(-8);
|
||||
cl_object mask = ecl_make_fixnum(0xFF);
|
||||
for (idx=0; idx<ndx; idx++) {
|
||||
cl_object b = cl_logand(2, byte, mask);
|
||||
buf[idx] = (unsigned char)ecl_fixnum(b);
|
||||
byte = cl_ash(byte, offset);
|
||||
}
|
||||
}
|
||||
|
|
@ -463,6 +463,19 @@ cl_object ecl_generic_close(cl_object strm);
|
|||
cl_index ecl_generic_write_vector(cl_object strm, cl_object data, cl_index start, cl_index end);
|
||||
cl_index ecl_generic_read_vector(cl_object strm, cl_object data, cl_index start, cl_index end);
|
||||
|
||||
/* streams/strm_binary.d */
|
||||
cl_object ecl_binary_read_byte(cl_object strm);
|
||||
void ecl_binary_write_byte(cl_object c, cl_object strm);
|
||||
|
||||
cl_object ecl_binary_u8_decoder(cl_object strm, unsigned char *buf);
|
||||
void ecl_binary_u8_encoder(cl_object strm, unsigned char *buf, cl_object byte);
|
||||
cl_object ecl_binary_s8_decoder(cl_object strm, unsigned char *buf);
|
||||
void ecl_binary_s8_encoder(cl_object strm, unsigned char *buf, cl_object byte);
|
||||
cl_object ecl_binary_be_decoder(cl_object strm, unsigned char *buf);
|
||||
void ecl_binary_be_encoder(cl_object strm, unsigned char *buf, cl_object byte);
|
||||
cl_object ecl_binary_le_decoder(cl_object strm, unsigned char *buf);
|
||||
void ecl_binary_le_encoder(cl_object strm, unsigned char *buf, cl_object byte);
|
||||
|
||||
/* streams/strm_eformat.d */
|
||||
ecl_character ecl_eformat_read_char(cl_object strm);
|
||||
void ecl_eformat_unread_char(cl_object strm, ecl_character c);
|
||||
|
|
@ -482,9 +495,10 @@ write_char_increment_column(cl_object strm, ecl_character c)
|
|||
strm->stream.column++;
|
||||
}
|
||||
|
||||
/* Maximum number of bytes required to encode a character. This currently
|
||||
* corresponds to (4 + 4) for the UCS-4 encoding with 4 being the byte-order
|
||||
* mark, 4 for the character. */
|
||||
/* Maximum number of octets required to encode a char or a byte. This currently
|
||||
* corresponds to:
|
||||
* - (4 + 4) for the UCS-4 with 4 being the byte-order mark, 4 for the char
|
||||
* - (64/ 8) for the EXT:BYTE64 which is the biggest array integer type */
|
||||
#define ENCODING_BUFFER_MAX_SIZE 8
|
||||
|
||||
/* file.d */
|
||||
|
|
|
|||
|
|
@ -669,6 +669,11 @@ typedef ecl_character (*cl_eformat_decoder)(cl_object stream, unsigned char **bu
|
|||
number of bytes used */
|
||||
typedef int (*cl_eformat_encoder)(cl_object stream, unsigned char *buffer, ecl_character c);
|
||||
|
||||
/* Buffer is assumed to be big enough to store whole byte. The byte size is
|
||||
stream->strm.byte_size. Decoder returns an object, encoder fills a buffer. */
|
||||
typedef cl_object (*cl_binary_decoder)(cl_object stream, unsigned char *buf);
|
||||
typedef void (*cl_binary_encoder)(cl_object stream, unsigned char *buf, cl_object byte);
|
||||
|
||||
#define ECL_ANSI_STREAM_P(o) \
|
||||
(ECL_IMMEDIATE(o) == 0 && ((o)->d.t == t_stream))
|
||||
#define ECL_ANSI_STREAM_TYPE_P(o,m) \
|
||||
|
|
@ -697,6 +702,8 @@ struct ecl_stream {
|
|||
cl_object format; /* external format */
|
||||
cl_eformat_encoder encoder;
|
||||
cl_eformat_decoder decoder;
|
||||
cl_binary_encoder byte_encoder;
|
||||
cl_binary_decoder byte_decoder;
|
||||
cl_object format_table;
|
||||
int flags; /* character table, flags, etc */
|
||||
ecl_character eof_char;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue