Support for large files

This commit is contained in:
Juan Jose Garcia Ripoll 2008-07-25 11:53:54 +02:00
parent 941d4ffca6
commit 46e3ea483e
6 changed files with 123 additions and 41 deletions

View file

@ -142,6 +142,8 @@ ECL 0.9k:
- New function EXT:MAKE-PIPE implements the equivalent of POSIX pipe() but
producing a two-way stream.
- Support for large files in systems that implement fseeko().
* CLOS:
- When caching generic function calls, ECL now uses a thread-local hash table

2
src/aclocal.m4 vendored
View file

@ -188,7 +188,7 @@ case "${host_os}" in
LDRPATH='-Wl,--rpath,~A'
clibs="-ldl"
# Maybe CFLAGS="-D_ISOC99_SOURCE ${CFLAGS}" ???
CFLAGS="-D_GNU_SOURCE ${CFLAGS}"
CFLAGS="-D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 ${CFLAGS}"
;;
gnu*)
thehost='gnu'

View file

@ -43,6 +43,16 @@
# include <sys/ioctl.h>
#endif
#ifndef HAVE_FSEEKO
#define ecl_off_t int
#define ecl_fseeko fseek
#defien ecl_ftello ftell
#else
#define ecl_off_t off_t
#define ecl_fseeko fseeko
#define ecl_ftello ftello
#endif
#define MAKE_BIT_MASK(n) ((1<<(n))-1)
static int flisten(FILE *fp);
@ -55,7 +65,7 @@ static int flisten(FILE *fp);
static void io_stream_begin_write(cl_object strm)
{
if (strm->stream.last_op > 0) {
fseek((FILE*)strm->stream.file, 0, SEEK_CUR);
ecl_fseeko((FILE*)strm->stream.file, 0, SEEK_CUR);
}
strm->stream.last_op = -1;
}
@ -73,6 +83,68 @@ static void io_stream_begin_read(cl_object strm)
strm->stream.last_op = +1;
}
static cl_object
ecl_off_t_to_integer(ecl_off_t offset)
{
cl_object output;
if (sizeof(ecl_off_t) == sizeof(cl_fixnum)) {
output = ecl_make_integer(offset);
} else if (offset <= MOST_POSITIVE_FIXNUM) {
output = MAKE_FIXNUM((cl_fixnum)offset);
} else {
cl_object y = big_register0_get();
#ifdef WITH_GMP
if (sizeof(y->big.big_limbs[0]) == sizeof(cl_index)) {
y->big.big_limbs[0] = (cl_index)offset;
offset >>= FIXNUM_BITS;
y->big.big_limbs[1] = offset;
y->big.big_size = offset? 2 : 1;
} else if (sizeof(y->big.big_limbs[0]) >= sizeof(ecl_off_t)) {
y->big.big_limbs[0] = offset;
y->big.big_size = 1;
}
#else
y->big.big_num = offset;
#endif
output = big_register_normalize(y);
}
return output;
}
static ecl_off_t
ecl_integer_to_off_t(cl_object offset)
{
ecl_off_t output = 0;
if (sizeof(ecl_off_t) == sizeof(cl_fixnum)) {
output = fixint(offset);
} else if (FIXNUMP(offset)) {
output = fixint(offset);
} else if (type_of(offset) == t_bignum) {
#ifdef WITH_GMP
if (sizeof(offset->big.big_limbs[0]) == sizeof(cl_index)) {
if (offset->big.big_size > 2) {
goto ERROR;
}
if (offset->big.big_size == 2) {
output = offset->big.big_limbs[1];
output <<= FIXNUM_BITS;
}
output += offset->big.big_limbs[0];
} else if (sizeof(offset->big.big_limbs[0]) >= sizeof(ecl_off_t)) {
if (offset->big.big_size > 1) {
goto ERROR;
}
output = offset->big.big_limbs[0];
}
#else
output = offset->big.big_num;
#endif
} else {
ERROR: FEerror("Not a valid file offset: ~S", 1, offset);
}
return output;
}
/*----------------------------------------------------------------------
* ecl_input_stream_p(strm) answers
* if stream strm is an input stream or not.
@ -383,7 +455,7 @@ ecl_open_stream(cl_object fn, enum ecl_smmode smm, cl_object if_exists,
FEerror("~S has an invalid binary header ~S",
2, fn, MAKE_FIXNUM(binary_header));
}
fseek(fp, 0, SEEK_SET);
ecl_fseeko(fp, 0, SEEK_SET);
}
} else if (smm == smm_output || smm == smm_io) {
if (if_exists == @':new_version' && if_does_not_exist == @':create')
@ -399,7 +471,7 @@ ecl_open_stream(cl_object fn, enum ecl_smmode smm, cl_object if_exists,
FEerror("~S has an invalid binary header ~S",
2, fn, MAKE_FIXNUM(binary_header));
if (binary_header != 0 && if_exists == @':append' &&
fseek(fp, -1, SEEK_END) == 0) {
ecl_fseeko(fp, -1, SEEK_END) == 0) {
/* Read the last byte */
bit_buffer = getc(fp) & 0xFF;
bits_left = binary_header;
@ -436,7 +508,7 @@ ecl_open_stream(cl_object fn, enum ecl_smmode smm, cl_object if_exists,
FEcannot_open(fn);
}
if (if_exists == @':append') {
fseek(fp, 0, SEEK_END);
ecl_fseeko(fp, 0, SEEK_END);
appending = TRUE;
}
} else if (Null(if_exists)) {
@ -497,9 +569,9 @@ ecl_open_stream(cl_object fn, enum ecl_smmode smm, cl_object if_exists,
/* Set file pointer to the correct position */
if (appending) {
if (bits_left != 0)
fseek(fp, -1, SEEK_END);
ecl_fseeko(fp, -1, SEEK_END);
} else {
fseek(fp, (use_header_p ? 1 : 0), SEEK_SET);
ecl_fseeko(fp, (use_header_p ? 1 : 0), SEEK_SET);
}
}
}
@ -542,7 +614,7 @@ static void flush_output_stream_binary(cl_object strm);
ecl_force_output(strm);
if (!strm->stream.char_stream_p && strm->stream.header != 0xFF) {
/* write header */
if (fseek(fp, 0, SEEK_SET) != 0)
if (ecl_fseeko(fp, 0, SEEK_SET) != 0)
io_error(strm);
ecl_write_byte8(strm->stream.header, strm);
}
@ -734,12 +806,12 @@ BEGIN:
if (strm->stream.buffer_state == 1) {
/* buffer is prepared for reading: re-read (8-nb) bits and throw the rest */
int c0;
fseek((FILE*)strm->stream.file, -1, SEEK_CUR);
ecl_fseeko((FILE*)strm->stream.file, -1, SEEK_CUR);
c0 = ecl_read_byte8(strm);
if (c0 == EOF)
/* this should not happen !!! */
io_error(strm);
fseek((FILE*)strm->stream.file, -1, SEEK_CUR);
ecl_fseeko((FILE*)strm->stream.file, -1, SEEK_CUR);
b = (unsigned char)(c0 & MAKE_BIT_MASK(8-nb));
nb = (8-nb);
}
@ -858,10 +930,10 @@ flush_output_stream_binary(cl_object strm)
FILE *fp = (FILE*)strm->stream.file;
/* do we need to merge with existing byte? */
long current_offset = ftell(fp), diff_offset;
if (fseek(fp, 0, SEEK_END) != 0)
ecl_off_t current_offset = ecl_ftello(fp), diff_offset;
if (ecl_fseeko(fp, 0, SEEK_END) != 0)
io_error(strm);
switch ((diff_offset = ftell(fp)-current_offset)) {
switch ((diff_offset = ecl_ftello(fp)-current_offset)) {
case 0: break;
case 1:
/* (EOF-1): merge only if less bits left than header tells us */
@ -871,7 +943,7 @@ flush_output_stream_binary(cl_object strm)
do_merging = (diff_offset > 1);
break;
}
if (fseek(fp, current_offset, SEEK_SET) != 0)
if (ecl_fseeko(fp, current_offset, SEEK_SET) != 0)
io_error(strm);
/* do merging, if required */
@ -882,21 +954,21 @@ flush_output_stream_binary(cl_object strm)
if (c != EOF)
b |= (unsigned char)(c & ~MAKE_BIT_MASK(nb));
/* rewind stream */
if (fseek(fp, -1, SEEK_CUR) != 0)
if (ecl_fseeko(fp, -1, SEEK_CUR) != 0)
io_error(strm);
} else {
/* write-only stream: need to reopen the file for reading *
* the byte to merge, then reopen it back for writing */
cl_object fn = si_coerce_to_filename(strm->stream.object1);
if (freopen(fn->base_string.self, OPEN_R, fp) == NULL ||
fseek(fp, current_offset, SEEK_SET) != 0)
ecl_fseeko(fp, current_offset, SEEK_SET) != 0)
io_error(strm);
/* cannot use ecl_read_byte8 here, because strm hasn't the right mode */
b |= (unsigned char)(getc(fp) & ~MAKE_BIT_MASK(nb));
/* need special trick to re-open the file for writing, avoiding truncation */
fclose(fp);
strm->stream.file = fdopen(open(fn->base_string.self, O_WRONLY), OPEN_W);
if (strm->stream.file == NULL || fseek(fp, current_offset, SEEK_SET) != 0)
if (strm->stream.file == NULL || ecl_fseeko(fp, current_offset, SEEK_SET) != 0)
io_error(strm);
}
} else {
@ -906,7 +978,7 @@ flush_output_stream_binary(cl_object strm)
/* flush byte w/o changing file pointer */
ecl_write_byte8(b, strm);
fseek(fp, -1, SEEK_CUR);
ecl_fseeko(fp, -1, SEEK_CUR);
}
}
@ -1805,7 +1877,7 @@ BEGIN:
#if 0
if (fp == NULL)
wrong_file_handler(strm);
if (fseek(fp, 0L, 2) != 0)
if (ecl_fseeko(fp, 0L, 2) != 0)
io_error(strm);
#endif
break;
@ -1935,11 +2007,11 @@ flisten(FILE *fp)
It will fail on noninteractive streams. */
{
/* regular file */
long old_pos = ftell(fp), end_pos;
if (fseek(fp, 0, SEEK_END) != 0)
ecl_off_t old_pos = ecl_ftello(fp), end_pos;
if (ecl_fseeko(fp, 0, SEEK_END) != 0)
FElibc_error("fseek() returned an error value", 0);
end_pos = ftell(fp);
if (fseek(fp, old_pos, SEEK_SET) != 0)
end_pos = ecl_ftello(fp);
if (ecl_fseeko(fp, old_pos, SEEK_SET) != 0)
FElibc_error("fseek() returned an error value", 0);
return (end_pos > old_pos ? ECL_LISTEN_AVAILABLE : ECL_LISTEN_EOF);
}
@ -2057,14 +2129,18 @@ BEGIN:
ecl_force_output(strm);
case smm_input: {
/* FIXME! This does not handle large file sizes */
cl_fixnum small_offset;
ecl_off_t offset;
FILE *fp = (FILE*)strm->stream.file;
if (fp == NULL)
wrong_file_handler(strm);
small_offset = ftell(fp);
if (small_offset < 0)
offset = ecl_ftello(fp);
if (offset < 0)
io_error(strm);
output = ecl_make_integer(small_offset);
if (sizeof(ecl_off_t) == sizeof(long)) {
output = ecl_make_integer(offset);
} else {
output = ecl_off_t_to_integer(offset);
}
break;
}
case smm_string_output:
@ -2126,7 +2202,8 @@ BEGIN:
cl_object
ecl_file_position_set(cl_object strm, cl_object large_disp)
{
cl_index disp, extra = 0;
ecl_off_t disp;
int extra = 0;
BEGIN:
#ifdef ECL_CLOS_STREAMS
if (ECL_INSTANCEP(strm))
@ -2154,10 +2231,10 @@ BEGIN:
/* reset internal buffer: should be set again if extra != 0 */
strm->stream.bit_buffer = strm->stream.bits_left = strm->stream.buffer_state = 0;
}
disp = fixnnint(large_disp);
disp = ecl_integer_to_off_t(large_disp);
if (fp == NULL)
wrong_file_handler(strm);
if (fseek(fp, disp, 0) != 0)
if (ecl_fseeko(fp, disp, 0) != 0)
return Cnil;
if (extra != 0) {
if (ecl_input_stream_p(strm)) {

13
src/configure vendored
View file

@ -1338,9 +1338,9 @@ Optional Features:
--enable-hierarchical-packages
hierarchical package names (default=YES)
--enable-asmapply enable optimizations written in assembler
(default=NO)
(default=YES)
--enable-smallcons use small (2 words) cons types. Requires
Boehm-Weiser gc. (default=NO)
Boehm-Weiser gc. (default=YES)
--enable-gengc use generational garbage collection. Requires
Boehm-Weiser gc. (no|yes, default=NO)
@ -2110,7 +2110,7 @@ fi
if test "${enable_asmapply+set}" = set; then
enableval=$enable_asmapply; enable_asmapply=${enableval}
else
enable_asmapply=no
enable_asmapply=yes
fi
@ -2118,7 +2118,7 @@ fi
if test "${enable_smallcons+set}" = set; then
enableval=$enable_smallcons; enable_smallcons=${enableval}
else
enable_smallcons=no
enable_smallcons=yes
fi
@ -4128,7 +4128,7 @@ case "${host_os}" in
LDRPATH='-Wl,--rpath,~A'
clibs="-ldl"
# Maybe CFLAGS="-D_ISOC99_SOURCE ${CFLAGS}" ???
CFLAGS="-D_GNU_SOURCE ${CFLAGS}"
CFLAGS="-D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 ${CFLAGS}"
;;
gnu*)
thehost='gnu'
@ -10752,7 +10752,8 @@ done
for ac_func in sched_yield uname
for ac_func in sched_yield uname fseeko
do
as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
{ echo "$as_me:$LINENO: checking for $ac_func" >&5

View file

@ -191,14 +191,14 @@ AC_ARG_ENABLE(hpack,
AC_ARG_ENABLE(asmapply,
AS_HELP_STRING( [--enable-asmapply],
[enable optimizations written in assembler]
[(default=NO)]),
[enable_asmapply=${enableval}],[enable_asmapply=no])
[(default=YES)]),
[enable_asmapply=${enableval}],[enable_asmapply=yes])
AC_ARG_ENABLE(smallcons,
AS_HELP_STRING( [--enable-smallcons],
[use small (2 words) cons types. Requires Boehm-Weiser gc.]
[(default=NO)]),
[enable_smallcons=${enableval}],[enable_smallcons=no])
[(default=YES)]),
[enable_smallcons=${enableval}],[enable_smallcons=yes])
AC_ARG_ENABLE(gengc,
AS_HELP_STRING( [--enable-gengc],
@ -471,7 +471,7 @@ AC_CHECK_FUNCS( [nanosleep alarm times isnanf select setenv putenv] \
AC_CHECK_FUNCS( [expf logf sqrtf cosf sinf tanf sinhf coshf tanhf] \
[floorf ceilf fabsf frexpf ldexpf log1p log1pf log1pl])
AC_CHECK_FUNCS( [sched_yield uname] )
AC_CHECK_FUNCS( [sched_yield uname fseeko] )
dnl =====================================================================
dnl Checks for system services

View file

@ -264,6 +264,8 @@ typedef unsigned @CL_FIXNUM_TYPE@ cl_hashkey;
#undef HAVE_FENV_H
/* can activate individual traps in floating point environment */
#undef HAVE_FEENABLEEXCEPT
/* has support for large files */
#undef HAVE_FSEEKO
/* the tzset() function gets the current time zone */
#undef HAVE_TZSET
/* several floating point functions (ISO C99) */