streams: add binary encoders and decoders to the mix

This will allow us to transcode characters to bytes and vice versa. This is
necessary to implement conductive UNREAD-BYTE and UNREAD-BYTE, but will allow us
to also add low-level parsers for binary objects in the future.
This commit is contained in:
Daniel Kochmański 2025-07-25 12:10:07 +02:00
parent 5fe96b8339
commit ba422ec9dd
6 changed files with 188 additions and 9 deletions

View file

@ -99,9 +99,9 @@ WRITER_OBJS = print.obj float_to_digits.obj float_to_string.obj \
READER_OBJS = read.obj parse_integer.obj parse_number.obj
STREAM_OBJS = stream.obj file.obj strm_os.obj \
strm_clos.obj strm_string.obj strm_composite.obj \
strm_common.obj strm_sequence.obj strm_eformat.obj
STREAM_OBJS = stream.obj file.obj strm_os.obj strm_clos.obj \
strm_string.obj strm_composite.obj strm_common.obj \
strm_sequence.obj strm_eformat.obj strm_binary.obj
FFI_OBJS = ffi.obj libraries.obj backtrace.obj mmap.obj cdata.obj

View file

@ -72,9 +72,9 @@ WRITER_OBJS = print.o printer/float_to_digits.o printer/float_to_string.o
READER_OBJS = read.o reader/parse_integer.o reader/parse_number.o
STREAM_OBJS = stream.o file.o streams/strm_os.o \
streams/strm_clos.o streams/strm_string.o streams/strm_composite.o \
streams/strm_common.o streams/strm_sequence.o streams/strm_eformat.o
STREAM_OBJS = stream.o file.o streams/strm_os.o streams/strm_clos.o \
streams/strm_string.o streams/strm_composite.o streams/strm_common.o \
streams/strm_sequence.o streams/strm_eformat.o streams/strm_binary.o
FFI_OBJS = ffi.o ffi/libraries.o ffi/backtrace.o ffi/mmap.o ffi/cdata.o

View file

@ -40,6 +40,8 @@ ecl_alloc_stream(void)
x->stream.buffer = NULL;
x->stream.encoder = NULL;
x->stream.decoder = NULL;
x->stream.byte_encoder = NULL;
x->stream.byte_decoder = NULL;
x->stream.last_char = EOF;
x->stream.byte_stack = ECL_NIL;
x->stream.last_code[0] = x->stream.last_code[1] = EOF;

156
src/c/streams/strm_binary.d Normal file
View file

@ -0,0 +1,156 @@
/* -*- Mode: C; c-basic-offset: 2; indent-tabs-mode: nil -*- */
/* vim: set filetype=c tabstop=2 shiftwidth=2 expandtab: */
/*
* strm_binary.d - Byte encoding/decoding for streams
*
* Copyright (c) 1984 Taiichi Yuasa and Masami Hagiya
* Copyright (c) 1990 Giuseppe Attardi
* Copyright (c) 2001 Juan Jose Garcia Ripoll
* Copyright (c) 2025 Daniel Kochmanski
*
* See file 'LICENSE' for the copyright details.
*
*/
#include <ecl/ecl.h>
#include <ecl/internal.h>
/* Binary operators */
cl_object
ecl_binary_read_byte(cl_object strm)
{
cl_index (*read_byte8)(cl_object, unsigned char *, cl_index);
unsigned char buf[ENCODING_BUFFER_MAX_SIZE];
cl_object byte;
cl_index nbytes;
byte = strm->stream.last_byte;
unlikely_if (byte != OBJNULL) {
strm->stream.last_byte = OBJNULL;
return byte;
}
read_byte8 = strm->stream.ops->read_byte8;
nbytes = strm->stream.byte_size/8;
if (read_byte8(strm, buf, nbytes) < nbytes)
return OBJNULL;
return strm->stream.byte_decoder(strm, buf);
}
void
ecl_binary_write_byte(cl_object strm, cl_object byte)
{
cl_index (*write_byte8)(cl_object strm, unsigned char *c, cl_index n);
cl_index nbytes = strm->stream.byte_size/8;
unsigned char buf[ENCODING_BUFFER_MAX_SIZE];
write_byte8 = strm->stream.ops->write_byte8;
strm->stream.byte_encoder(strm, buf, byte);
write_byte8(strm, buf, nbytes);
}
/*
* 8-bit unsigned
*/
cl_object
ecl_binary_u8_decoder(cl_object strm, unsigned char *buf)
{
unsigned char c = buf[0];
return ecl_make_fixnum(c);
}
void
ecl_binary_u8_encoder(cl_object strm, unsigned char *buf, cl_object byte)
{
unsigned char c = ecl_to_uint8_t(byte);
buf[0] = c;
}
/*
* 8-bit signed
*/
cl_object
ecl_binary_s8_decoder(cl_object strm, unsigned char *buf)
{
signed char c = (signed char)buf[0];
return ecl_make_fixnum(c);
}
void
ecl_binary_s8_encoder(cl_object strm, unsigned char *buf, cl_object byte)
{
signed char c = ecl_to_int8_t(byte);
buf[0] = (unsigned char)c;
}
/*
* Big Endian
*/
cl_object
ecl_binary_be_decoder(cl_object strm, unsigned char *buf)
{
cl_index idx, ndx = strm->stream.byte_size/8;
cl_object output = OBJNULL;
cl_object offset = ecl_make_fixnum(8);
unsigned char c;
for (idx=0; idx<ndx; idx++) {
c = buf[idx];
if (output) {
output = cl_logior(2, ecl_make_fixnum(c), cl_ash(output, offset));
} else {
output = (strm->stream.flags & ECL_STREAM_SIGNED_BYTES)
? ecl_make_fixnum((signed char)c)
: ecl_make_fixnum((unsigned char)c);
}
}
return output;
}
void
ecl_binary_be_encoder(cl_object strm, unsigned char *buf, cl_object byte)
{
cl_index idx, ndx = strm->stream.byte_size/8;
cl_object offset = ecl_make_fixnum(-8);
cl_object mask = ecl_make_fixnum(0xFF);
for (idx=0; idx<ndx; idx++) {
cl_object b = cl_logand(2, byte, mask);
buf[ndx-idx-1] = (unsigned char)ecl_fixnum(b);
byte = cl_ash(byte, offset);
}
}
/*
* Little Endian
*/
cl_object
ecl_binary_le_decoder(cl_object strm, unsigned char *buf)
{
cl_index idx, ndx = strm->stream.byte_size/8;
cl_object output = OBJNULL;
cl_object offset = ecl_make_fixnum(8);
unsigned char c;
for (idx=0; idx<ndx; idx++) {
c = buf[ndx-idx-1];
if (output) {
output = cl_logior(2, ecl_make_fixnum(c), cl_ash(output, offset));
} else {
output = (strm->stream.flags & ECL_STREAM_SIGNED_BYTES)
? ecl_make_fixnum((signed char)c)
: ecl_make_fixnum((unsigned char)c);
}
}
return output;
}
void
ecl_binary_le_encoder(cl_object strm, unsigned char *buf, cl_object byte)
{
cl_index idx, ndx = strm->stream.byte_size/8;
cl_object offset = ecl_make_fixnum(-8);
cl_object mask = ecl_make_fixnum(0xFF);
for (idx=0; idx<ndx; idx++) {
cl_object b = cl_logand(2, byte, mask);
buf[idx] = (unsigned char)ecl_fixnum(b);
byte = cl_ash(byte, offset);
}
}

View file

@ -463,6 +463,19 @@ cl_object ecl_generic_close(cl_object strm);
cl_index ecl_generic_write_vector(cl_object strm, cl_object data, cl_index start, cl_index end);
cl_index ecl_generic_read_vector(cl_object strm, cl_object data, cl_index start, cl_index end);
/* streams/strm_binary.d */
cl_object ecl_binary_read_byte(cl_object strm);
void ecl_binary_write_byte(cl_object c, cl_object strm);
cl_object ecl_binary_u8_decoder(cl_object strm, unsigned char *buf);
void ecl_binary_u8_encoder(cl_object strm, unsigned char *buf, cl_object byte);
cl_object ecl_binary_s8_decoder(cl_object strm, unsigned char *buf);
void ecl_binary_s8_encoder(cl_object strm, unsigned char *buf, cl_object byte);
cl_object ecl_binary_be_decoder(cl_object strm, unsigned char *buf);
void ecl_binary_be_encoder(cl_object strm, unsigned char *buf, cl_object byte);
cl_object ecl_binary_le_decoder(cl_object strm, unsigned char *buf);
void ecl_binary_le_encoder(cl_object strm, unsigned char *buf, cl_object byte);
/* streams/strm_eformat.d */
ecl_character ecl_eformat_read_char(cl_object strm);
void ecl_eformat_unread_char(cl_object strm, ecl_character c);
@ -482,9 +495,10 @@ write_char_increment_column(cl_object strm, ecl_character c)
strm->stream.column++;
}
/* Maximum number of bytes required to encode a character. This currently
* corresponds to (4 + 4) for the UCS-4 encoding with 4 being the byte-order
* mark, 4 for the character. */
/* Maximum number of octets required to encode a char or a byte. This currently
* corresponds to:
* - (4 + 4) for the UCS-4 with 4 being the byte-order mark, 4 for the char
* - (64/ 8) for the EXT:BYTE64 which is the biggest array integer type */
#define ENCODING_BUFFER_MAX_SIZE 8
/* file.d */

View file

@ -669,6 +669,11 @@ typedef ecl_character (*cl_eformat_decoder)(cl_object stream, unsigned char **bu
number of bytes used */
typedef int (*cl_eformat_encoder)(cl_object stream, unsigned char *buffer, ecl_character c);
/* Buffer is assumed to be big enough to store whole byte. The byte size is
stream->strm.byte_size. Decoder returns an object, encoder fills a buffer. */
typedef cl_object (*cl_binary_decoder)(cl_object stream, unsigned char *buf);
typedef void (*cl_binary_encoder)(cl_object stream, unsigned char *buf, cl_object byte);
#define ECL_ANSI_STREAM_P(o) \
(ECL_IMMEDIATE(o) == 0 && ((o)->d.t == t_stream))
#define ECL_ANSI_STREAM_TYPE_P(o,m) \
@ -697,6 +702,8 @@ struct ecl_stream {
cl_object format; /* external format */
cl_eformat_encoder encoder;
cl_eformat_decoder decoder;
cl_binary_encoder byte_encoder;
cl_binary_decoder byte_decoder;
cl_object format_table;
int flags; /* character table, flags, etc */
ecl_character eof_char;