From ce2badff9070303146e4277970993aeba6eacfe7 Mon Sep 17 00:00:00 2001 From: Alexander Gavrilov Date: Sat, 7 Aug 2010 17:17:11 +0400 Subject: [PATCH] Add the boxed SSE packed vector types. Boxing SSE values is useless performance-wise, but necessary to provide continuity between compiled and interpreted code. This set of types is peculiar in that while the actual CPU instruction set mostly does not care about the data types (although mixing commands for ints and floats leads to some performance degradation), the C intrinsic function interface does distinguish the types to some extent. Thus it also has to be represented in the ECL compiler. --- src/c/Makefile.in | 2 +- src/c/alloc.d | 3 + src/c/alloc_2.d | 9 +++ src/c/gbc-new.d | 3 + src/c/gbc.d | 3 + src/c/hash.d | 4 ++ src/c/instance.d | 7 ++ src/c/main.d | 3 + src/c/predicate.d | 7 ++ src/c/print.d | 68 +++++++++++++++++++ src/c/serialize.d | 3 + src/c/sse2.d | 149 ++++++++++++++++++++++++++++++++++++++++++ src/c/symbols_list.h | 14 ++++ src/c/symbols_list2.h | 14 ++++ src/c/typespec.d | 13 ++++ src/clos/builtin.lsp | 3 +- src/h/config.h.in | 3 + src/h/ecl.h | 5 ++ src/h/external.h | 13 ++++ src/h/object.h | 39 +++++++++++ src/lsp/predlib.lsp | 18 +++++ 21 files changed, 381 insertions(+), 2 deletions(-) create mode 100644 src/c/sse2.d diff --git a/src/c/Makefile.in b/src/c/Makefile.in index 86e48dfa1..48c49017d 100644 --- a/src/c/Makefile.in +++ b/src/c/Makefile.in @@ -50,7 +50,7 @@ OBJS = main.o symbol.o package.o list.o\ time.o unixint.o\ mapfun.o multival.o hash.o format.o pathname.o\ structure.o load.o unixfsys.o unixsys.o \ - serialize.o ffi.o @EXTRA_OBJS@ + serialize.o ffi.o sse2.o @EXTRA_OBJS@ .SUFFIXES: .c .o .d .s .PHONY: all diff --git a/src/c/alloc.d b/src/c/alloc.d index d9d646d82..06fbea24c 100644 --- a/src/c/alloc.d +++ b/src/c/alloc.d @@ -276,6 +276,9 @@ ONCE_MORE: obj->ratio.num = OBJNULL; obj->ratio.den = OBJNULL; break; +#ifdef ECL_SSE2 + case t_sse_pack: +#endif case t_singlefloat: case t_doublefloat: #ifdef ECL_LONG_FLOAT diff --git a/src/c/alloc_2.d b/src/c/alloc_2.d index 02b666dc3..1c5224210 100755 --- a/src/c/alloc_2.d +++ b/src/c/alloc_2.d @@ -495,6 +495,9 @@ ecl_alloc_object(cl_type t) return MAKE_FIXNUM(0); /* Immediate fixnum */ case t_character: return CODE_CHAR(' '); /* Immediate character */ +#ifdef ECL_SSE2 + case t_sse_pack: +#endif #ifdef ECL_LONG_FLOAT case t_longfloat: #endif @@ -821,6 +824,9 @@ init_alloc(void) init_tm(t_foreign, "FOREIGN", sizeof(struct ecl_foreign), 2); init_tm(t_frame, "STACK-FRAME", sizeof(struct ecl_stack_frame), 2); init_tm(t_weak_pointer, "WEAK-POINTER", sizeof(struct ecl_weak_pointer), 0); +#ifdef ECL_SSE2 + init_tm(t_sse_pack, "SSE-PACK", sizeof(struct ecl_sse_pack), 0); +#endif #ifdef GBC_BOEHM_PRECISE type_info[t_list].descriptor = to_bitmap(&c, &(c.car)) | @@ -968,6 +974,9 @@ init_alloc(void) to_bitmap(&o, &(o.frame.base)) | to_bitmap(&o, &(o.frame.env)); type_info[t_weak_pointer].descriptor = 0; +#ifdef ECL_SSE2 + type_info[t_sse_pack].descriptor = 0; +#endif for (i = 0; i < t_end; i++) { GC_word descriptor = type_info[i].descriptor; int bits = type_info[i].size / sizeof(GC_word); diff --git a/src/c/gbc-new.d b/src/c/gbc-new.d index b9bcc70e9..c8fa49612 100644 --- a/src/c/gbc-new.d +++ b/src/c/gbc-new.d @@ -153,6 +153,9 @@ BEGIN: mark_next(x->ratio.den); break; +#ifdef ECL_SSE2 + case t_sse_pack: +#endif case t_singlefloat: case t_doublefloat: break; diff --git a/src/c/gbc.d b/src/c/gbc.d index 1ca896df0..c4d4801f0 100644 --- a/src/c/gbc.d +++ b/src/c/gbc.d @@ -153,6 +153,9 @@ BEGIN: mark_next(x->ratio.den); break; +#ifdef ECL_SSE2 + case t_sse_pack: +#endif case t_singlefloat: case t_doublefloat: #ifdef ECL_LONG_FLOAT diff --git a/src/c/hash.d b/src/c/hash.d index 107b74e1d..34b857799 100644 --- a/src/c/hash.d +++ b/src/c/hash.d @@ -72,6 +72,10 @@ _hash_eql(cl_hashkey h, cl_object x) return _hash_eql(h, x->complex.imag); case t_character: return hash_word(h, CHAR_CODE(x)); +#ifdef ECL_SSE2 + case t_sse_pack: + return hash_string(h, x->sse.data.b8, 16); +#endif default: return hash_word(h, ((cl_hashkey)x >> 2)); } diff --git a/src/c/instance.d b/src/c/instance.d index f4ca7fa0a..0f7d6ddc7 100644 --- a/src/c/instance.d +++ b/src/c/instance.d @@ -265,6 +265,9 @@ enum ecl_built_in_classes { #ifdef ECL_SEMAPHORES , ECL_BUILTIN_SEMAPHORE #endif +#ifdef ECL_SSE2 + , ECL_BUILTIN_SSE_PACK +#endif }; cl_object @@ -363,6 +366,10 @@ cl_class_of(cl_object x) index = ECL_BUILTIN_FRAME; break; case t_weak_pointer: index = ECL_BUILTIN_WEAK_POINTER; break; +#ifdef ECL_SSE2 + case t_sse_pack: + index = ECL_BUILTIN_SSE_PACK; break; +#endif default: ecl_internal_error("not a lisp data object"); } diff --git a/src/c/main.d b/src/c/main.d index 758ee429b..12144c92c 100644 --- a/src/c/main.d +++ b/src/c/main.d @@ -181,6 +181,9 @@ static const char *feature_names[] = { #endif #ifdef __cplusplus "C++", +#endif +#ifdef ECL_SSE2 + "SSE2", #endif 0 }; diff --git a/src/c/predicate.d b/src/c/predicate.d index 268a4883a..b5c0bdbd7 100644 --- a/src/c/predicate.d +++ b/src/c/predicate.d @@ -292,6 +292,13 @@ ecl_eql(cl_object x, cl_object y) case t_complex: return (ecl_eql(x->complex.real, y->complex.real) && ecl_eql(x->complex.imag, y->complex.imag)); +#ifdef ECL_SSE2 + case t_sse_pack: + return (x->sse.elttype == y->sse.elttype || + (x->sse.elttype != aet_sf && x->sse.elttype != aet_df && + y->sse.elttype != aet_sf && y->sse.elttype != aet_df)) + && !memcmp(x->sse.data.b8, y->sse.data.b8, 16); +#endif default: return FALSE; } diff --git a/src/c/print.d b/src/c/print.d index 524d333a6..83e3e659a 100644 --- a/src/c/print.d +++ b/src/c/print.d @@ -1035,6 +1035,66 @@ write_array(bool vector, cl_object x, cl_object stream) } } +#ifdef ECL_SSE2 +static int +is_all_FF(void *ptr, int size) { + int i; + for (i = 0; i < size; i++) + if (((unsigned char*)ptr)[i] != 0xFF) + return 0; + return 1; +} + +static void +write_sse_float(float v, cl_object stream) +{ + if (is_all_FF(&v, sizeof(float))) + write_str(" TRUE", stream); + else { + char buf[60]; + sprintf(buf, " %g", v); + write_str(buf, stream); + } +} + +static void +write_sse_double(double v, cl_object stream) +{ + if (is_all_FF(&v, sizeof(double))) + write_str(" TRUE", stream); + else { + char buf[60]; + sprintf(buf, " %lg", v); + write_str(buf, stream); + } +} + +static void +write_sse_pack(cl_object x, cl_object stream) +{ + int i; + + switch (x->sse.elttype) { + case aet_sf: + for (i = 0; i < 4; i++) + write_sse_float(x->sse.data.sf[i], stream); + break; + case aet_df: + write_sse_double(x->sse.data.df[0], stream); + write_sse_double(x->sse.data.df[1], stream); + break; + default: + for (i = 0; i < 16; i++) { + char buf[10]; + int pad = 1 + (i%4 == 0); + sprintf(buf, "%*c%02x", pad, ' ', x->sse.data.b8[i]); + write_str(buf, stream); + } + break; + } +} +#endif + cl_object si_write_ugly_object(cl_object x, cl_object stream) { @@ -1619,6 +1679,14 @@ si_write_ugly_object(cl_object x, cl_object stream) write_addr(x, stream); write_ch('>', stream); break; +#endif +#ifdef ECL_SSE2 + case t_sse_pack: + if (ecl_print_readably()) FEprint_not_readable(x); + write_str("#', stream); + break; #endif default: if (ecl_print_readably()) FEprint_not_readable(x); diff --git a/src/c/serialize.d b/src/c/serialize.d index 3e1b35e50..6f73150a8 100644 --- a/src/c/serialize.d +++ b/src/c/serialize.d @@ -83,6 +83,9 @@ static cl_index object_size[] = { ROUNDED_SIZE(ecl_foreign), /* t_foreign */ ROUNDED_SIZE(ecl_frame), /* t_frame */ ROUNDED_SIZE(ecl_weak_pointer) /* t_weak_pointer */ +#ifdef ECL_SSE2 + , ROUNDED_SIZE(ecl_sse_pack) /* t_sse_pack */ +#endif }; typedef struct pool { diff --git a/src/c/sse2.d b/src/c/sse2.d new file mode 100644 index 000000000..ac253200b --- /dev/null +++ b/src/c/sse2.d @@ -0,0 +1,149 @@ +/* -*- mode: c; c-basic-offset: 8 -*- */ +/* + sse2.c -- SSE2 vector type support +*/ +/* + Copyright (c) 1984, Taiichi Yuasa and Masami Hagiya. + Copyright (c) 1990, Giuseppe Attardi. + Copyright (c) 2001, Juan Jose Garcia Ripoll. + + ECL is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + See file '../Copyright' for full details. +*/ + +#include +#include +#include +#define ECL_DEFINE_AET_SIZE +#include + +#ifdef ECL_SSE2 + +/* Predicates */ + +cl_object +si_sse_pack_p(cl_object x) +{ + @(return (ECL_SSE_PACK_P(x) ? Ct : Cnil)) +} + +cl_object +si_int_sse_pack_p(cl_object x) +{ + @(return (ECL_SSE_PACK_P(x) && x->sse.elttype != aet_sf && x->sse.elttype != aet_df ? Ct : Cnil)) +} + +cl_object +si_float_sse_pack_p(cl_object x) +{ + @(return (ECL_SSE_PACK_P(x) && x->sse.elttype == aet_sf ? Ct : Cnil)) +} + +cl_object +si_double_sse_pack_p(cl_object x) +{ + @(return (ECL_SSE_PACK_P(x) && x->sse.elttype == aet_df ? Ct : Cnil)) +} + +/* Element type substitution */ + +static void verify_sse_elttype(cl_elttype eltt) { + switch (eltt) { + case aet_sf: + case aet_df: + case aet_b8: + case aet_i8: +#ifdef ecl_uint16_t + case aet_b16: + case aet_i16: +#endif +#ifdef ecl_uint32_t + case aet_b32: + case aet_i32: +#endif +#ifdef ecl_uint64_t + case aet_b64: + case aet_i64: +#endif + break; /* OK */ + default: + FEerror("Invalid element type for an SSE pack: ~S", 1, ecl_elttype_to_symbol(eltt)); + } +} + +static +cl_elttype symbol_to_sse_elttype(cl_object type) { + cl_elttype eltt = ecl_symbol_to_elttype(type); + verify_sse_elttype(eltt); + return eltt; +} + +cl_object +si_sse_pack_as_elt_type(cl_object x, cl_object type) +{ + cl_elttype rtype; + + if (ecl_unlikely(!ECL_SSE_PACK_P(x))) { + FEwrong_type_nth_arg(@[ext::sse-pack-as-elt-type], 1, x, @[ext::sse-pack]); + } + + rtype = symbol_to_sse_elttype(type); + + if (x->sse.elttype != rtype) { + cl_object new = ecl_alloc_object(t_sse_pack); + new->sse.elttype = rtype; + new->sse.data.vi = x->sse.data.vi; + x = new; + } + + @(return x) +} + +/* Conversion to and from specialized vectors */ + +cl_object +si_sse_pack_to_vector(cl_object x, cl_object elt_type) +{ + cl_elttype etype; + cl_object vec; + + if (ecl_unlikely(!ECL_SSE_PACK_P(x))) { + FEwrong_type_nth_arg(@[ext::sse-pack-to-vector], 1, x, @[ext::sse-pack]); + } + + etype = x->sse.elttype; + if (elt_type != Cnil) + etype = symbol_to_sse_elttype(elt_type); + + vec = ecl_alloc_simple_vector(16/ecl_aet_size[etype], etype); + memcpy(vec->vector.self.b8, x->sse.data.b8, 16); + + @(return vec) +} + +cl_object +si_vector_to_sse_pack(cl_object x) +{ + cl_object ssev; + + if (ecl_unlikely(!ECL_ARRAYP(x))) { + FEwrong_type_nth_arg(@[ext::vector-to-sse-pack], 1, x, @[array]); + } + + verify_sse_elttype(x->vector.elttype); + + if (ecl_unlikely(x->vector.dim * ecl_aet_size[x->vector.elttype] != 16)) + FEerror("Wrong vector size in VECTOR-TO-SSE-PACK: ~S",1,MAKE_FIXNUM(x->vector.dim)); + + ssev = ecl_alloc_object(t_sse_pack); + ssev->sse.elttype = x->vector.elttype; + memcpy(ssev->sse.data.b8, x->vector.self.b8, 16); + + @(return ssev) +} + +#endif // ECL_SSE2 diff --git a/src/c/symbols_list.h b/src/c/symbols_list.h index 42a726903..4b43cd1d2 100755 --- a/src/c/symbols_list.h +++ b/src/c/symbols_list.h @@ -1928,5 +1928,19 @@ cl_symbols[] = { {SYS_ "SERIALIZE", SI_ORDINARY, si_serialize, 1, OBJNULL}, {SYS_ "DESERIALIZE", SI_ORDINARY, si_deserialize, 1, OBJNULL}, +#ifdef ECL_SSE2 +{EXT_ "SSE-PACK", EXT_ORDINARY, NULL, -1, OBJNULL}, +{EXT_ "SSE-PACK-P", EXT_ORDINARY, si_sse_pack_p, 1, OBJNULL}, +{EXT_ "SSE-PACK-AS-ELT-TYPE", EXT_ORDINARY, si_sse_pack_as_elt_type, 2, OBJNULL}, +{EXT_ "VECTOR-TO-SSE-PACK", EXT_ORDINARY, si_vector_to_sse_pack, 1, OBJNULL}, +{EXT_ "SSE-PACK-TO-VECTOR", EXT_ORDINARY, si_sse_pack_to_vector, 2, OBJNULL}, +{EXT_ "INT-SSE-PACK", EXT_ORDINARY, NULL, -1, OBJNULL}, +{EXT_ "INT-SSE-PACK-P", EXT_ORDINARY, si_int_sse_pack_p, 1, OBJNULL}, +{EXT_ "FLOAT-SSE-PACK", EXT_ORDINARY, NULL, -1, OBJNULL}, +{EXT_ "FLOAT-SSE-PACK-P", EXT_ORDINARY, si_float_sse_pack_p, 1, OBJNULL}, +{EXT_ "DOUBLE-SSE-PACK", EXT_ORDINARY, NULL, -1, OBJNULL}, +{EXT_ "DOUBLE-SSE-PACK-P", EXT_ORDINARY, si_double_sse_pack_p, 1, OBJNULL}, +#endif + /* Tag for end of list */ {NULL, CL_ORDINARY, NULL, -1, OBJNULL}}; diff --git a/src/c/symbols_list2.h b/src/c/symbols_list2.h index e98623e7c..e6a04f73d 100755 --- a/src/c/symbols_list2.h +++ b/src/c/symbols_list2.h @@ -1928,5 +1928,19 @@ cl_symbols[] = { {SYS_ "SERIALIZE","si_serialize"}, {SYS_ "DESERIALIZE","si_deserialize"}, +#ifdef ECL_SSE2 +{EXT_ "SSE-PACK",NULL}, +{EXT_ "SSE-PACK-P","si_sse_pack_p"}, +{EXT_ "SSE-PACK-AS-ELT-TYPE","si_sse_pack_as_elt_type"}, +{EXT_ "VECTOR-TO-SSE-PACK","si_vector_to_sse_pack"}, +{EXT_ "SSE-PACK-TO-VECTOR","si_sse_pack_to_vector"}, +{EXT_ "INT-SSE-PACK",NULL}, +{EXT_ "INT-SSE-PACK-P","si_int_sse_pack_p"}, +{EXT_ "FLOAT-SSE-PACK",NULL}, +{EXT_ "FLOAT-SSE-PACK-P","si_float_sse_pack_p"}, +{EXT_ "DOUBLE-SSE-PACK",NULL}, +{EXT_ "DOUBLE-SSE-PACK-P","si_double_sse_pack_p"}, +#endif + /* Tag for end of list */ {NULL,NULL}}; diff --git a/src/c/typespec.d b/src/c/typespec.d index fd27f74ee..d38cbd49a 100644 --- a/src/c/typespec.d +++ b/src/c/typespec.d @@ -162,6 +162,10 @@ ecl_type_to_symbol(cl_type t) return @'si::frame'; case t_weak_pointer: return @'ext::weak-pointer'; +#ifdef ECL_SSE2 + case t_sse_pack: + return @'ext::sse-pack'; +#endif default: ecl_internal_error("not a lisp data object"); } @@ -322,6 +326,15 @@ cl_type_of(cl_object x) case t_list: t = Null(x) ? @'null' : @'cons'; break; +#ifdef ECL_SSE2 + case t_sse_pack: + switch (x->sse.elttype) { + case aet_sf: t = @'ext::float-sse-pack'; break; + case aet_df: t = @'ext::double-sse-pack'; break; + default: t = @'ext::int-sse-pack'; break; + } + break; +#endif default: t = ecl_type_to_symbol(tx); } diff --git a/src/clos/builtin.lsp b/src/clos/builtin.lsp index 962b4344a..22705b21e 100644 --- a/src/clos/builtin.lsp +++ b/src/clos/builtin.lsp @@ -86,7 +86,8 @@ #+threads (mp::process) #+threads (mp::lock) #+threads (mp::condition-variable) - #+semaphores (mp::semaphore)))) + #+semaphores (mp::semaphore) + #+sse2 (ext::sse-pack)))) (loop for (name . rest) in '#.+builtin-classes+ with index = 1 diff --git a/src/h/config.h.in b/src/h/config.h.in index 67030d360..b0bb279e7 100644 --- a/src/h/config.h.in +++ b/src/h/config.h.in @@ -253,6 +253,9 @@ typedef unsigned @CL_FIXNUM_TYPE@ cl_hashkey; # endif #endif +#if defined(__SSE2__) || (defined(_M_IX86_FP) && _M_IX86_FP >= 2) +#define ECL_SSE2 +#endif /* -CUT-: Everything below this mark will not be installed */ /* -------------------------------------------------------------------- * diff --git a/src/h/ecl.h b/src/h/ecl.h index 63270895a..227b36b05 100644 --- a/src/h/ecl.h +++ b/src/h/ecl.h @@ -72,6 +72,11 @@ # endif /* ECL_THREADS */ #endif /* _MSC_VER || __MINGW32__ */ +#ifdef ECL_SSE2 +#include +#include +#endif + #include #include #include diff --git a/src/h/external.h b/src/h/external.h index 2d3894d91..59aafd04a 100755 --- a/src/h/external.h +++ b/src/h/external.h @@ -1503,6 +1503,19 @@ extern ECL_API cl_fixnum ecl_length(cl_object x); extern ECL_API cl_object ecl_subseq(cl_object seq, cl_index start, cl_index limit); extern ECL_API cl_object ecl_copy_seq(cl_object seq); +#ifdef ECL_SSE2 +/* sse2.c */ + +extern ECL_API cl_object si_sse_pack_p(cl_object x); +extern ECL_API cl_object si_int_sse_pack_p(cl_object x); +extern ECL_API cl_object si_float_sse_pack_p(cl_object x); +extern ECL_API cl_object si_double_sse_pack_p(cl_object x); +extern ECL_API cl_object si_sse_pack_as_elt_type(cl_object x, cl_object type); + +extern ECL_API cl_object si_vector_to_sse_pack(cl_object x); +extern ECL_API cl_object si_sse_pack_to_vector(cl_object x, cl_object elt_type); + +#endif /* stacks.c */ diff --git a/src/h/object.h b/src/h/object.h index 494037ed2..5224851d1 100644 --- a/src/h/object.h +++ b/src/h/object.h @@ -87,6 +87,9 @@ typedef enum { t_foreign, t_frame, t_weak_pointer, +#ifdef ECL_SSE2 + t_sse_pack, +#endif t_end, t_other, t_contiguous, /* contiguous block */ @@ -179,6 +182,9 @@ typedef cl_object (*cl_objectfn_fixed)(); #define ECL_PATHNAMEP(x) ((IMMEDIATE(x) == 0) && ((x)->d.t == t_pathname)) #define ECL_READTABLEP(x) ((IMMEDIATE(x) == 0) && ((x)->d.t == t_readtable)) #define ECL_FOREIGN_DATA_P(x) ((IMMEDIATE(x) == 0) && ((x)->d.t == t_foreign)) +#ifdef ECL_SSE2 +#define ECL_SSE_PACK_P(x) ((IMMEDIATE(x) == 0) && ((x)->d.t == t_sse_pack)) +#endif #define HEADER int8_t t, m, padding1, padding2 #define HEADER1(field) int8_t t, m, field, padding @@ -939,6 +945,36 @@ struct ecl_instance { /* instance header */ }; #endif /* CLOS */ +#ifdef ECL_SSE2 +union ecl_sse_data { + __m128 vf; + __m128i vi; + __m128d vd; + + uint8_t b8[16]; + int8_t i8[16]; +#ifdef ecl_uint16_t + ecl_uint16_t b16[8]; + ecl_int16_t i16[8]; +#endif +#ifdef ecl_uint32_t + ecl_uint32_t b32[4]; + ecl_int32_t i32[4]; +#endif +#ifdef ecl_uint64_t + ecl_uint64_t b64[2]; + ecl_int64_t i64[2]; +#endif + float sf[4]; + double df[2]; +}; + +struct ecl_sse_pack { + HEADER1(elttype); + union ecl_sse_data data; +}; +#endif + /* Definition of lispunion. */ @@ -991,6 +1027,9 @@ union cl_lispunion { struct ecl_foreign foreign; /* user defined data type */ struct ecl_stack_frame frame; /* stack frame */ struct ecl_weak_pointer weak; /* weak pointers */ +#ifdef ECL_SSE2 + struct ecl_sse_pack sse; +#endif }; /* diff --git a/src/lsp/predlib.lsp b/src/lsp/predlib.lsp index 1f717fdca..8a58391b4 100644 --- a/src/lsp/predlib.lsp +++ b/src/lsp/predlib.lsp @@ -342,6 +342,10 @@ and is not adjustable." (STRING . STRINGP) (STRUCTURE . SYS:STRUCTUREP) (SYMBOL . SYMBOLP) + #+sse2 (EXT:SSE-PACK . EXT:SSE-PACK-P) + #+sse2 (EXT:INT-SSE-PACK . EXT:INT-SSE-PACK-P) + #+sse2 (EXT:FLOAT-SSE-PACK . EXT:FLOAT-SSE-PACK-P) + #+sse2 (EXT:DOUBLE-SSE-PACK . EXT:DOUBLE-SSE-PACK-P) (T . CONSTANTLY-T) (VECTOR . VECTORP)))) @@ -650,6 +654,14 @@ if not possible." (FUNCTION (coerce-to-function object)) ((VECTOR SIMPLE-VECTOR #+unicode SIMPLE-BASE-STRING SIMPLE-STRING #+unicode BASE-STRING STRING BIT-VECTOR SIMPLE-BIT-VECTOR) (concatenate type object)) + #+sse2 + ((EXT:INT-SSE-PACK EXT:FLOAT-SSE-PACK EXT:DOUBLE-SSE-PACK) + (if (ext:sse-pack-p object) + (ext:sse-pack-as-elt-type object (case type + (EXT:INT-SSE-PACK '(unsigned-byte 8)) + (EXT:FLOAT-SSE-PACK 'single-float) + (EXT:DOUBLE-SSE-PACK 'double-float))) + (error-coerce object type))) (t (if (or (listp object) (vector object)) (concatenate type object) @@ -1219,6 +1231,12 @@ if not possible." #+threads (MP::PROCESS) #+threads (MP::LOCK) #+ffi (FOREIGN-DATA) + #+sse2 (EXT:SSE-PACK (OR EXT:INT-SSE-PACK + EXT:FLOAT-SSE-PACK + EXT:DOUBLE-SSE-PACK)) + #+sse2 (EXT:INT-SSE-PACK) + #+sse2 (EXT:FLOAT-SSE-PACK) + #+sse2 (EXT:DOUBLE-SSE-PACK) (CODE-BLOCK) ))