diff --git a/src/c/Makefile.in b/src/c/Makefile.in index 86e48dfa1..48c49017d 100644 --- a/src/c/Makefile.in +++ b/src/c/Makefile.in @@ -50,7 +50,7 @@ OBJS = main.o symbol.o package.o list.o\ time.o unixint.o\ mapfun.o multival.o hash.o format.o pathname.o\ structure.o load.o unixfsys.o unixsys.o \ - serialize.o ffi.o @EXTRA_OBJS@ + serialize.o ffi.o sse2.o @EXTRA_OBJS@ .SUFFIXES: .c .o .d .s .PHONY: all diff --git a/src/c/alloc.d b/src/c/alloc.d index d9d646d82..06fbea24c 100644 --- a/src/c/alloc.d +++ b/src/c/alloc.d @@ -276,6 +276,9 @@ ONCE_MORE: obj->ratio.num = OBJNULL; obj->ratio.den = OBJNULL; break; +#ifdef ECL_SSE2 + case t_sse_pack: +#endif case t_singlefloat: case t_doublefloat: #ifdef ECL_LONG_FLOAT diff --git a/src/c/alloc_2.d b/src/c/alloc_2.d index 02b666dc3..1c5224210 100755 --- a/src/c/alloc_2.d +++ b/src/c/alloc_2.d @@ -495,6 +495,9 @@ ecl_alloc_object(cl_type t) return MAKE_FIXNUM(0); /* Immediate fixnum */ case t_character: return CODE_CHAR(' '); /* Immediate character */ +#ifdef ECL_SSE2 + case t_sse_pack: +#endif #ifdef ECL_LONG_FLOAT case t_longfloat: #endif @@ -821,6 +824,9 @@ init_alloc(void) init_tm(t_foreign, "FOREIGN", sizeof(struct ecl_foreign), 2); init_tm(t_frame, "STACK-FRAME", sizeof(struct ecl_stack_frame), 2); init_tm(t_weak_pointer, "WEAK-POINTER", sizeof(struct ecl_weak_pointer), 0); +#ifdef ECL_SSE2 + init_tm(t_sse_pack, "SSE-PACK", sizeof(struct ecl_sse_pack), 0); +#endif #ifdef GBC_BOEHM_PRECISE type_info[t_list].descriptor = to_bitmap(&c, &(c.car)) | @@ -968,6 +974,9 @@ init_alloc(void) to_bitmap(&o, &(o.frame.base)) | to_bitmap(&o, &(o.frame.env)); type_info[t_weak_pointer].descriptor = 0; +#ifdef ECL_SSE2 + type_info[t_sse_pack].descriptor = 0; +#endif for (i = 0; i < t_end; i++) { GC_word descriptor = type_info[i].descriptor; int bits = type_info[i].size / sizeof(GC_word); diff --git a/src/c/gbc-new.d b/src/c/gbc-new.d index b9bcc70e9..c8fa49612 100644 --- a/src/c/gbc-new.d +++ b/src/c/gbc-new.d @@ -153,6 +153,9 @@ BEGIN: mark_next(x->ratio.den); break; +#ifdef ECL_SSE2 + case t_sse_pack: +#endif case t_singlefloat: case t_doublefloat: break; diff --git a/src/c/gbc.d b/src/c/gbc.d index 1ca896df0..c4d4801f0 100644 --- a/src/c/gbc.d +++ b/src/c/gbc.d @@ -153,6 +153,9 @@ BEGIN: mark_next(x->ratio.den); break; +#ifdef ECL_SSE2 + case t_sse_pack: +#endif case t_singlefloat: case t_doublefloat: #ifdef ECL_LONG_FLOAT diff --git a/src/c/hash.d b/src/c/hash.d index 107b74e1d..34b857799 100644 --- a/src/c/hash.d +++ b/src/c/hash.d @@ -72,6 +72,10 @@ _hash_eql(cl_hashkey h, cl_object x) return _hash_eql(h, x->complex.imag); case t_character: return hash_word(h, CHAR_CODE(x)); +#ifdef ECL_SSE2 + case t_sse_pack: + return hash_string(h, x->sse.data.b8, 16); +#endif default: return hash_word(h, ((cl_hashkey)x >> 2)); } diff --git a/src/c/instance.d b/src/c/instance.d index f4ca7fa0a..0f7d6ddc7 100644 --- a/src/c/instance.d +++ b/src/c/instance.d @@ -265,6 +265,9 @@ enum ecl_built_in_classes { #ifdef ECL_SEMAPHORES , ECL_BUILTIN_SEMAPHORE #endif +#ifdef ECL_SSE2 + , ECL_BUILTIN_SSE_PACK +#endif }; cl_object @@ -363,6 +366,10 @@ cl_class_of(cl_object x) index = ECL_BUILTIN_FRAME; break; case t_weak_pointer: index = ECL_BUILTIN_WEAK_POINTER; break; +#ifdef ECL_SSE2 + case t_sse_pack: + index = ECL_BUILTIN_SSE_PACK; break; +#endif default: ecl_internal_error("not a lisp data object"); } diff --git a/src/c/main.d b/src/c/main.d index 758ee429b..12144c92c 100644 --- a/src/c/main.d +++ b/src/c/main.d @@ -181,6 +181,9 @@ static const char *feature_names[] = { #endif #ifdef __cplusplus "C++", +#endif +#ifdef ECL_SSE2 + "SSE2", #endif 0 }; diff --git a/src/c/predicate.d b/src/c/predicate.d index 268a4883a..b5c0bdbd7 100644 --- a/src/c/predicate.d +++ b/src/c/predicate.d @@ -292,6 +292,13 @@ ecl_eql(cl_object x, cl_object y) case t_complex: return (ecl_eql(x->complex.real, y->complex.real) && ecl_eql(x->complex.imag, y->complex.imag)); +#ifdef ECL_SSE2 + case t_sse_pack: + return (x->sse.elttype == y->sse.elttype || + (x->sse.elttype != aet_sf && x->sse.elttype != aet_df && + y->sse.elttype != aet_sf && y->sse.elttype != aet_df)) + && !memcmp(x->sse.data.b8, y->sse.data.b8, 16); +#endif default: return FALSE; } diff --git a/src/c/print.d b/src/c/print.d index 524d333a6..83e3e659a 100644 --- a/src/c/print.d +++ b/src/c/print.d @@ -1035,6 +1035,66 @@ write_array(bool vector, cl_object x, cl_object stream) } } +#ifdef ECL_SSE2 +static int +is_all_FF(void *ptr, int size) { + int i; + for (i = 0; i < size; i++) + if (((unsigned char*)ptr)[i] != 0xFF) + return 0; + return 1; +} + +static void +write_sse_float(float v, cl_object stream) +{ + if (is_all_FF(&v, sizeof(float))) + write_str(" TRUE", stream); + else { + char buf[60]; + sprintf(buf, " %g", v); + write_str(buf, stream); + } +} + +static void +write_sse_double(double v, cl_object stream) +{ + if (is_all_FF(&v, sizeof(double))) + write_str(" TRUE", stream); + else { + char buf[60]; + sprintf(buf, " %lg", v); + write_str(buf, stream); + } +} + +static void +write_sse_pack(cl_object x, cl_object stream) +{ + int i; + + switch (x->sse.elttype) { + case aet_sf: + for (i = 0; i < 4; i++) + write_sse_float(x->sse.data.sf[i], stream); + break; + case aet_df: + write_sse_double(x->sse.data.df[0], stream); + write_sse_double(x->sse.data.df[1], stream); + break; + default: + for (i = 0; i < 16; i++) { + char buf[10]; + int pad = 1 + (i%4 == 0); + sprintf(buf, "%*c%02x", pad, ' ', x->sse.data.b8[i]); + write_str(buf, stream); + } + break; + } +} +#endif + cl_object si_write_ugly_object(cl_object x, cl_object stream) { @@ -1619,6 +1679,14 @@ si_write_ugly_object(cl_object x, cl_object stream) write_addr(x, stream); write_ch('>', stream); break; +#endif +#ifdef ECL_SSE2 + case t_sse_pack: + if (ecl_print_readably()) FEprint_not_readable(x); + write_str("#', stream); + break; #endif default: if (ecl_print_readably()) FEprint_not_readable(x); diff --git a/src/c/serialize.d b/src/c/serialize.d index 3e1b35e50..6f73150a8 100644 --- a/src/c/serialize.d +++ b/src/c/serialize.d @@ -83,6 +83,9 @@ static cl_index object_size[] = { ROUNDED_SIZE(ecl_foreign), /* t_foreign */ ROUNDED_SIZE(ecl_frame), /* t_frame */ ROUNDED_SIZE(ecl_weak_pointer) /* t_weak_pointer */ +#ifdef ECL_SSE2 + , ROUNDED_SIZE(ecl_sse_pack) /* t_sse_pack */ +#endif }; typedef struct pool { diff --git a/src/c/sse2.d b/src/c/sse2.d new file mode 100644 index 000000000..ac253200b --- /dev/null +++ b/src/c/sse2.d @@ -0,0 +1,149 @@ +/* -*- mode: c; c-basic-offset: 8 -*- */ +/* + sse2.c -- SSE2 vector type support +*/ +/* + Copyright (c) 1984, Taiichi Yuasa and Masami Hagiya. + Copyright (c) 1990, Giuseppe Attardi. + Copyright (c) 2001, Juan Jose Garcia Ripoll. + + ECL is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + See file '../Copyright' for full details. +*/ + +#include +#include +#include +#define ECL_DEFINE_AET_SIZE +#include + +#ifdef ECL_SSE2 + +/* Predicates */ + +cl_object +si_sse_pack_p(cl_object x) +{ + @(return (ECL_SSE_PACK_P(x) ? Ct : Cnil)) +} + +cl_object +si_int_sse_pack_p(cl_object x) +{ + @(return (ECL_SSE_PACK_P(x) && x->sse.elttype != aet_sf && x->sse.elttype != aet_df ? Ct : Cnil)) +} + +cl_object +si_float_sse_pack_p(cl_object x) +{ + @(return (ECL_SSE_PACK_P(x) && x->sse.elttype == aet_sf ? Ct : Cnil)) +} + +cl_object +si_double_sse_pack_p(cl_object x) +{ + @(return (ECL_SSE_PACK_P(x) && x->sse.elttype == aet_df ? Ct : Cnil)) +} + +/* Element type substitution */ + +static void verify_sse_elttype(cl_elttype eltt) { + switch (eltt) { + case aet_sf: + case aet_df: + case aet_b8: + case aet_i8: +#ifdef ecl_uint16_t + case aet_b16: + case aet_i16: +#endif +#ifdef ecl_uint32_t + case aet_b32: + case aet_i32: +#endif +#ifdef ecl_uint64_t + case aet_b64: + case aet_i64: +#endif + break; /* OK */ + default: + FEerror("Invalid element type for an SSE pack: ~S", 1, ecl_elttype_to_symbol(eltt)); + } +} + +static +cl_elttype symbol_to_sse_elttype(cl_object type) { + cl_elttype eltt = ecl_symbol_to_elttype(type); + verify_sse_elttype(eltt); + return eltt; +} + +cl_object +si_sse_pack_as_elt_type(cl_object x, cl_object type) +{ + cl_elttype rtype; + + if (ecl_unlikely(!ECL_SSE_PACK_P(x))) { + FEwrong_type_nth_arg(@[ext::sse-pack-as-elt-type], 1, x, @[ext::sse-pack]); + } + + rtype = symbol_to_sse_elttype(type); + + if (x->sse.elttype != rtype) { + cl_object new = ecl_alloc_object(t_sse_pack); + new->sse.elttype = rtype; + new->sse.data.vi = x->sse.data.vi; + x = new; + } + + @(return x) +} + +/* Conversion to and from specialized vectors */ + +cl_object +si_sse_pack_to_vector(cl_object x, cl_object elt_type) +{ + cl_elttype etype; + cl_object vec; + + if (ecl_unlikely(!ECL_SSE_PACK_P(x))) { + FEwrong_type_nth_arg(@[ext::sse-pack-to-vector], 1, x, @[ext::sse-pack]); + } + + etype = x->sse.elttype; + if (elt_type != Cnil) + etype = symbol_to_sse_elttype(elt_type); + + vec = ecl_alloc_simple_vector(16/ecl_aet_size[etype], etype); + memcpy(vec->vector.self.b8, x->sse.data.b8, 16); + + @(return vec) +} + +cl_object +si_vector_to_sse_pack(cl_object x) +{ + cl_object ssev; + + if (ecl_unlikely(!ECL_ARRAYP(x))) { + FEwrong_type_nth_arg(@[ext::vector-to-sse-pack], 1, x, @[array]); + } + + verify_sse_elttype(x->vector.elttype); + + if (ecl_unlikely(x->vector.dim * ecl_aet_size[x->vector.elttype] != 16)) + FEerror("Wrong vector size in VECTOR-TO-SSE-PACK: ~S",1,MAKE_FIXNUM(x->vector.dim)); + + ssev = ecl_alloc_object(t_sse_pack); + ssev->sse.elttype = x->vector.elttype; + memcpy(ssev->sse.data.b8, x->vector.self.b8, 16); + + @(return ssev) +} + +#endif // ECL_SSE2 diff --git a/src/c/symbols_list.h b/src/c/symbols_list.h index 42a726903..4b43cd1d2 100755 --- a/src/c/symbols_list.h +++ b/src/c/symbols_list.h @@ -1928,5 +1928,19 @@ cl_symbols[] = { {SYS_ "SERIALIZE", SI_ORDINARY, si_serialize, 1, OBJNULL}, {SYS_ "DESERIALIZE", SI_ORDINARY, si_deserialize, 1, OBJNULL}, +#ifdef ECL_SSE2 +{EXT_ "SSE-PACK", EXT_ORDINARY, NULL, -1, OBJNULL}, +{EXT_ "SSE-PACK-P", EXT_ORDINARY, si_sse_pack_p, 1, OBJNULL}, +{EXT_ "SSE-PACK-AS-ELT-TYPE", EXT_ORDINARY, si_sse_pack_as_elt_type, 2, OBJNULL}, +{EXT_ "VECTOR-TO-SSE-PACK", EXT_ORDINARY, si_vector_to_sse_pack, 1, OBJNULL}, +{EXT_ "SSE-PACK-TO-VECTOR", EXT_ORDINARY, si_sse_pack_to_vector, 2, OBJNULL}, +{EXT_ "INT-SSE-PACK", EXT_ORDINARY, NULL, -1, OBJNULL}, +{EXT_ "INT-SSE-PACK-P", EXT_ORDINARY, si_int_sse_pack_p, 1, OBJNULL}, +{EXT_ "FLOAT-SSE-PACK", EXT_ORDINARY, NULL, -1, OBJNULL}, +{EXT_ "FLOAT-SSE-PACK-P", EXT_ORDINARY, si_float_sse_pack_p, 1, OBJNULL}, +{EXT_ "DOUBLE-SSE-PACK", EXT_ORDINARY, NULL, -1, OBJNULL}, +{EXT_ "DOUBLE-SSE-PACK-P", EXT_ORDINARY, si_double_sse_pack_p, 1, OBJNULL}, +#endif + /* Tag for end of list */ {NULL, CL_ORDINARY, NULL, -1, OBJNULL}}; diff --git a/src/c/symbols_list2.h b/src/c/symbols_list2.h index e98623e7c..e6a04f73d 100755 --- a/src/c/symbols_list2.h +++ b/src/c/symbols_list2.h @@ -1928,5 +1928,19 @@ cl_symbols[] = { {SYS_ "SERIALIZE","si_serialize"}, {SYS_ "DESERIALIZE","si_deserialize"}, +#ifdef ECL_SSE2 +{EXT_ "SSE-PACK",NULL}, +{EXT_ "SSE-PACK-P","si_sse_pack_p"}, +{EXT_ "SSE-PACK-AS-ELT-TYPE","si_sse_pack_as_elt_type"}, +{EXT_ "VECTOR-TO-SSE-PACK","si_vector_to_sse_pack"}, +{EXT_ "SSE-PACK-TO-VECTOR","si_sse_pack_to_vector"}, +{EXT_ "INT-SSE-PACK",NULL}, +{EXT_ "INT-SSE-PACK-P","si_int_sse_pack_p"}, +{EXT_ "FLOAT-SSE-PACK",NULL}, +{EXT_ "FLOAT-SSE-PACK-P","si_float_sse_pack_p"}, +{EXT_ "DOUBLE-SSE-PACK",NULL}, +{EXT_ "DOUBLE-SSE-PACK-P","si_double_sse_pack_p"}, +#endif + /* Tag for end of list */ {NULL,NULL}}; diff --git a/src/c/typespec.d b/src/c/typespec.d index fd27f74ee..d38cbd49a 100644 --- a/src/c/typespec.d +++ b/src/c/typespec.d @@ -162,6 +162,10 @@ ecl_type_to_symbol(cl_type t) return @'si::frame'; case t_weak_pointer: return @'ext::weak-pointer'; +#ifdef ECL_SSE2 + case t_sse_pack: + return @'ext::sse-pack'; +#endif default: ecl_internal_error("not a lisp data object"); } @@ -322,6 +326,15 @@ cl_type_of(cl_object x) case t_list: t = Null(x) ? @'null' : @'cons'; break; +#ifdef ECL_SSE2 + case t_sse_pack: + switch (x->sse.elttype) { + case aet_sf: t = @'ext::float-sse-pack'; break; + case aet_df: t = @'ext::double-sse-pack'; break; + default: t = @'ext::int-sse-pack'; break; + } + break; +#endif default: t = ecl_type_to_symbol(tx); } diff --git a/src/clos/builtin.lsp b/src/clos/builtin.lsp index 962b4344a..22705b21e 100644 --- a/src/clos/builtin.lsp +++ b/src/clos/builtin.lsp @@ -86,7 +86,8 @@ #+threads (mp::process) #+threads (mp::lock) #+threads (mp::condition-variable) - #+semaphores (mp::semaphore)))) + #+semaphores (mp::semaphore) + #+sse2 (ext::sse-pack)))) (loop for (name . rest) in '#.+builtin-classes+ with index = 1 diff --git a/src/h/config.h.in b/src/h/config.h.in index 67030d360..b0bb279e7 100644 --- a/src/h/config.h.in +++ b/src/h/config.h.in @@ -253,6 +253,9 @@ typedef unsigned @CL_FIXNUM_TYPE@ cl_hashkey; # endif #endif +#if defined(__SSE2__) || (defined(_M_IX86_FP) && _M_IX86_FP >= 2) +#define ECL_SSE2 +#endif /* -CUT-: Everything below this mark will not be installed */ /* -------------------------------------------------------------------- * diff --git a/src/h/ecl.h b/src/h/ecl.h index 63270895a..227b36b05 100644 --- a/src/h/ecl.h +++ b/src/h/ecl.h @@ -72,6 +72,11 @@ # endif /* ECL_THREADS */ #endif /* _MSC_VER || __MINGW32__ */ +#ifdef ECL_SSE2 +#include +#include +#endif + #include #include #include diff --git a/src/h/external.h b/src/h/external.h index 2d3894d91..59aafd04a 100755 --- a/src/h/external.h +++ b/src/h/external.h @@ -1503,6 +1503,19 @@ extern ECL_API cl_fixnum ecl_length(cl_object x); extern ECL_API cl_object ecl_subseq(cl_object seq, cl_index start, cl_index limit); extern ECL_API cl_object ecl_copy_seq(cl_object seq); +#ifdef ECL_SSE2 +/* sse2.c */ + +extern ECL_API cl_object si_sse_pack_p(cl_object x); +extern ECL_API cl_object si_int_sse_pack_p(cl_object x); +extern ECL_API cl_object si_float_sse_pack_p(cl_object x); +extern ECL_API cl_object si_double_sse_pack_p(cl_object x); +extern ECL_API cl_object si_sse_pack_as_elt_type(cl_object x, cl_object type); + +extern ECL_API cl_object si_vector_to_sse_pack(cl_object x); +extern ECL_API cl_object si_sse_pack_to_vector(cl_object x, cl_object elt_type); + +#endif /* stacks.c */ diff --git a/src/h/object.h b/src/h/object.h index 494037ed2..5224851d1 100644 --- a/src/h/object.h +++ b/src/h/object.h @@ -87,6 +87,9 @@ typedef enum { t_foreign, t_frame, t_weak_pointer, +#ifdef ECL_SSE2 + t_sse_pack, +#endif t_end, t_other, t_contiguous, /* contiguous block */ @@ -179,6 +182,9 @@ typedef cl_object (*cl_objectfn_fixed)(); #define ECL_PATHNAMEP(x) ((IMMEDIATE(x) == 0) && ((x)->d.t == t_pathname)) #define ECL_READTABLEP(x) ((IMMEDIATE(x) == 0) && ((x)->d.t == t_readtable)) #define ECL_FOREIGN_DATA_P(x) ((IMMEDIATE(x) == 0) && ((x)->d.t == t_foreign)) +#ifdef ECL_SSE2 +#define ECL_SSE_PACK_P(x) ((IMMEDIATE(x) == 0) && ((x)->d.t == t_sse_pack)) +#endif #define HEADER int8_t t, m, padding1, padding2 #define HEADER1(field) int8_t t, m, field, padding @@ -939,6 +945,36 @@ struct ecl_instance { /* instance header */ }; #endif /* CLOS */ +#ifdef ECL_SSE2 +union ecl_sse_data { + __m128 vf; + __m128i vi; + __m128d vd; + + uint8_t b8[16]; + int8_t i8[16]; +#ifdef ecl_uint16_t + ecl_uint16_t b16[8]; + ecl_int16_t i16[8]; +#endif +#ifdef ecl_uint32_t + ecl_uint32_t b32[4]; + ecl_int32_t i32[4]; +#endif +#ifdef ecl_uint64_t + ecl_uint64_t b64[2]; + ecl_int64_t i64[2]; +#endif + float sf[4]; + double df[2]; +}; + +struct ecl_sse_pack { + HEADER1(elttype); + union ecl_sse_data data; +}; +#endif + /* Definition of lispunion. */ @@ -991,6 +1027,9 @@ union cl_lispunion { struct ecl_foreign foreign; /* user defined data type */ struct ecl_stack_frame frame; /* stack frame */ struct ecl_weak_pointer weak; /* weak pointers */ +#ifdef ECL_SSE2 + struct ecl_sse_pack sse; +#endif }; /* diff --git a/src/lsp/predlib.lsp b/src/lsp/predlib.lsp index 1f717fdca..8a58391b4 100644 --- a/src/lsp/predlib.lsp +++ b/src/lsp/predlib.lsp @@ -342,6 +342,10 @@ and is not adjustable." (STRING . STRINGP) (STRUCTURE . SYS:STRUCTUREP) (SYMBOL . SYMBOLP) + #+sse2 (EXT:SSE-PACK . EXT:SSE-PACK-P) + #+sse2 (EXT:INT-SSE-PACK . EXT:INT-SSE-PACK-P) + #+sse2 (EXT:FLOAT-SSE-PACK . EXT:FLOAT-SSE-PACK-P) + #+sse2 (EXT:DOUBLE-SSE-PACK . EXT:DOUBLE-SSE-PACK-P) (T . CONSTANTLY-T) (VECTOR . VECTORP)))) @@ -650,6 +654,14 @@ if not possible." (FUNCTION (coerce-to-function object)) ((VECTOR SIMPLE-VECTOR #+unicode SIMPLE-BASE-STRING SIMPLE-STRING #+unicode BASE-STRING STRING BIT-VECTOR SIMPLE-BIT-VECTOR) (concatenate type object)) + #+sse2 + ((EXT:INT-SSE-PACK EXT:FLOAT-SSE-PACK EXT:DOUBLE-SSE-PACK) + (if (ext:sse-pack-p object) + (ext:sse-pack-as-elt-type object (case type + (EXT:INT-SSE-PACK '(unsigned-byte 8)) + (EXT:FLOAT-SSE-PACK 'single-float) + (EXT:DOUBLE-SSE-PACK 'double-float))) + (error-coerce object type))) (t (if (or (listp object) (vector object)) (concatenate type object) @@ -1219,6 +1231,12 @@ if not possible." #+threads (MP::PROCESS) #+threads (MP::LOCK) #+ffi (FOREIGN-DATA) + #+sse2 (EXT:SSE-PACK (OR EXT:INT-SSE-PACK + EXT:FLOAT-SSE-PACK + EXT:DOUBLE-SSE-PACK)) + #+sse2 (EXT:INT-SSE-PACK) + #+sse2 (EXT:FLOAT-SSE-PACK) + #+sse2 (EXT:DOUBLE-SSE-PACK) (CODE-BLOCK) ))