Add the boxed SSE packed vector types.

Boxing SSE values is useless performance-wise, but necessary
to provide continuity between compiled and interpreted code.

This set of types is peculiar in that while the actual CPU
instruction set mostly does not care about the data types
(although mixing commands for ints and floats leads to
some performance degradation), the C intrinsic function
interface does distinguish the types to some extent.
Thus it also has to be represented in the ECL compiler.
This commit is contained in:
Alexander Gavrilov 2010-08-07 17:17:11 +04:00 committed by Juan Jose Garcia Ripoll
parent 7178a288a6
commit ce2badff90
21 changed files with 381 additions and 2 deletions

View file

@ -50,7 +50,7 @@ OBJS = main.o symbol.o package.o list.o\
time.o unixint.o\
mapfun.o multival.o hash.o format.o pathname.o\
structure.o load.o unixfsys.o unixsys.o \
serialize.o ffi.o @EXTRA_OBJS@
serialize.o ffi.o sse2.o @EXTRA_OBJS@
.SUFFIXES: .c .o .d .s
.PHONY: all

View file

@ -276,6 +276,9 @@ ONCE_MORE:
obj->ratio.num = OBJNULL;
obj->ratio.den = OBJNULL;
break;
#ifdef ECL_SSE2
case t_sse_pack:
#endif
case t_singlefloat:
case t_doublefloat:
#ifdef ECL_LONG_FLOAT

View file

@ -495,6 +495,9 @@ ecl_alloc_object(cl_type t)
return MAKE_FIXNUM(0); /* Immediate fixnum */
case t_character:
return CODE_CHAR(' '); /* Immediate character */
#ifdef ECL_SSE2
case t_sse_pack:
#endif
#ifdef ECL_LONG_FLOAT
case t_longfloat:
#endif
@ -821,6 +824,9 @@ init_alloc(void)
init_tm(t_foreign, "FOREIGN", sizeof(struct ecl_foreign), 2);
init_tm(t_frame, "STACK-FRAME", sizeof(struct ecl_stack_frame), 2);
init_tm(t_weak_pointer, "WEAK-POINTER", sizeof(struct ecl_weak_pointer), 0);
#ifdef ECL_SSE2
init_tm(t_sse_pack, "SSE-PACK", sizeof(struct ecl_sse_pack), 0);
#endif
#ifdef GBC_BOEHM_PRECISE
type_info[t_list].descriptor =
to_bitmap(&c, &(c.car)) |
@ -968,6 +974,9 @@ init_alloc(void)
to_bitmap(&o, &(o.frame.base)) |
to_bitmap(&o, &(o.frame.env));
type_info[t_weak_pointer].descriptor = 0;
#ifdef ECL_SSE2
type_info[t_sse_pack].descriptor = 0;
#endif
for (i = 0; i < t_end; i++) {
GC_word descriptor = type_info[i].descriptor;
int bits = type_info[i].size / sizeof(GC_word);

View file

@ -153,6 +153,9 @@ BEGIN:
mark_next(x->ratio.den);
break;
#ifdef ECL_SSE2
case t_sse_pack:
#endif
case t_singlefloat:
case t_doublefloat:
break;

View file

@ -153,6 +153,9 @@ BEGIN:
mark_next(x->ratio.den);
break;
#ifdef ECL_SSE2
case t_sse_pack:
#endif
case t_singlefloat:
case t_doublefloat:
#ifdef ECL_LONG_FLOAT

View file

@ -72,6 +72,10 @@ _hash_eql(cl_hashkey h, cl_object x)
return _hash_eql(h, x->complex.imag);
case t_character:
return hash_word(h, CHAR_CODE(x));
#ifdef ECL_SSE2
case t_sse_pack:
return hash_string(h, x->sse.data.b8, 16);
#endif
default:
return hash_word(h, ((cl_hashkey)x >> 2));
}

View file

@ -265,6 +265,9 @@ enum ecl_built_in_classes {
#ifdef ECL_SEMAPHORES
, ECL_BUILTIN_SEMAPHORE
#endif
#ifdef ECL_SSE2
, ECL_BUILTIN_SSE_PACK
#endif
};
cl_object
@ -363,6 +366,10 @@ cl_class_of(cl_object x)
index = ECL_BUILTIN_FRAME; break;
case t_weak_pointer:
index = ECL_BUILTIN_WEAK_POINTER; break;
#ifdef ECL_SSE2
case t_sse_pack:
index = ECL_BUILTIN_SSE_PACK; break;
#endif
default:
ecl_internal_error("not a lisp data object");
}

View file

@ -181,6 +181,9 @@ static const char *feature_names[] = {
#endif
#ifdef __cplusplus
"C++",
#endif
#ifdef ECL_SSE2
"SSE2",
#endif
0
};

View file

@ -292,6 +292,13 @@ ecl_eql(cl_object x, cl_object y)
case t_complex:
return (ecl_eql(x->complex.real, y->complex.real) &&
ecl_eql(x->complex.imag, y->complex.imag));
#ifdef ECL_SSE2
case t_sse_pack:
return (x->sse.elttype == y->sse.elttype ||
(x->sse.elttype != aet_sf && x->sse.elttype != aet_df &&
y->sse.elttype != aet_sf && y->sse.elttype != aet_df))
&& !memcmp(x->sse.data.b8, y->sse.data.b8, 16);
#endif
default:
return FALSE;
}

View file

@ -1035,6 +1035,66 @@ write_array(bool vector, cl_object x, cl_object stream)
}
}
#ifdef ECL_SSE2
static int
is_all_FF(void *ptr, int size) {
int i;
for (i = 0; i < size; i++)
if (((unsigned char*)ptr)[i] != 0xFF)
return 0;
return 1;
}
static void
write_sse_float(float v, cl_object stream)
{
if (is_all_FF(&v, sizeof(float)))
write_str(" TRUE", stream);
else {
char buf[60];
sprintf(buf, " %g", v);
write_str(buf, stream);
}
}
static void
write_sse_double(double v, cl_object stream)
{
if (is_all_FF(&v, sizeof(double)))
write_str(" TRUE", stream);
else {
char buf[60];
sprintf(buf, " %lg", v);
write_str(buf, stream);
}
}
static void
write_sse_pack(cl_object x, cl_object stream)
{
int i;
switch (x->sse.elttype) {
case aet_sf:
for (i = 0; i < 4; i++)
write_sse_float(x->sse.data.sf[i], stream);
break;
case aet_df:
write_sse_double(x->sse.data.df[0], stream);
write_sse_double(x->sse.data.df[1], stream);
break;
default:
for (i = 0; i < 16; i++) {
char buf[10];
int pad = 1 + (i%4 == 0);
sprintf(buf, "%*c%02x", pad, ' ', x->sse.data.b8[i]);
write_str(buf, stream);
}
break;
}
}
#endif
cl_object
si_write_ugly_object(cl_object x, cl_object stream)
{
@ -1619,6 +1679,14 @@ si_write_ugly_object(cl_object x, cl_object stream)
write_addr(x, stream);
write_ch('>', stream);
break;
#endif
#ifdef ECL_SSE2
case t_sse_pack:
if (ecl_print_readably()) FEprint_not_readable(x);
write_str("#<SSE", stream);
write_sse_pack(x, stream);
write_ch('>', stream);
break;
#endif
default:
if (ecl_print_readably()) FEprint_not_readable(x);

View file

@ -83,6 +83,9 @@ static cl_index object_size[] = {
ROUNDED_SIZE(ecl_foreign), /* t_foreign */
ROUNDED_SIZE(ecl_frame), /* t_frame */
ROUNDED_SIZE(ecl_weak_pointer) /* t_weak_pointer */
#ifdef ECL_SSE2
, ROUNDED_SIZE(ecl_sse_pack) /* t_sse_pack */
#endif
};
typedef struct pool {

149
src/c/sse2.d Normal file
View file

@ -0,0 +1,149 @@
/* -*- mode: c; c-basic-offset: 8 -*- */
/*
sse2.c -- SSE2 vector type support
*/
/*
Copyright (c) 1984, Taiichi Yuasa and Masami Hagiya.
Copyright (c) 1990, Giuseppe Attardi.
Copyright (c) 2001, Juan Jose Garcia Ripoll.
ECL is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public
License as published by the Free Software Foundation; either
version 2 of the License, or (at your option) any later version.
See file '../Copyright' for full details.
*/
#include <limits.h>
#include <string.h>
#include <ecl/ecl.h>
#define ECL_DEFINE_AET_SIZE
#include <ecl/internal.h>
#ifdef ECL_SSE2
/* Predicates */
cl_object
si_sse_pack_p(cl_object x)
{
@(return (ECL_SSE_PACK_P(x) ? Ct : Cnil))
}
cl_object
si_int_sse_pack_p(cl_object x)
{
@(return (ECL_SSE_PACK_P(x) && x->sse.elttype != aet_sf && x->sse.elttype != aet_df ? Ct : Cnil))
}
cl_object
si_float_sse_pack_p(cl_object x)
{
@(return (ECL_SSE_PACK_P(x) && x->sse.elttype == aet_sf ? Ct : Cnil))
}
cl_object
si_double_sse_pack_p(cl_object x)
{
@(return (ECL_SSE_PACK_P(x) && x->sse.elttype == aet_df ? Ct : Cnil))
}
/* Element type substitution */
static void verify_sse_elttype(cl_elttype eltt) {
switch (eltt) {
case aet_sf:
case aet_df:
case aet_b8:
case aet_i8:
#ifdef ecl_uint16_t
case aet_b16:
case aet_i16:
#endif
#ifdef ecl_uint32_t
case aet_b32:
case aet_i32:
#endif
#ifdef ecl_uint64_t
case aet_b64:
case aet_i64:
#endif
break; /* OK */
default:
FEerror("Invalid element type for an SSE pack: ~S", 1, ecl_elttype_to_symbol(eltt));
}
}
static
cl_elttype symbol_to_sse_elttype(cl_object type) {
cl_elttype eltt = ecl_symbol_to_elttype(type);
verify_sse_elttype(eltt);
return eltt;
}
cl_object
si_sse_pack_as_elt_type(cl_object x, cl_object type)
{
cl_elttype rtype;
if (ecl_unlikely(!ECL_SSE_PACK_P(x))) {
FEwrong_type_nth_arg(@[ext::sse-pack-as-elt-type], 1, x, @[ext::sse-pack]);
}
rtype = symbol_to_sse_elttype(type);
if (x->sse.elttype != rtype) {
cl_object new = ecl_alloc_object(t_sse_pack);
new->sse.elttype = rtype;
new->sse.data.vi = x->sse.data.vi;
x = new;
}
@(return x)
}
/* Conversion to and from specialized vectors */
cl_object
si_sse_pack_to_vector(cl_object x, cl_object elt_type)
{
cl_elttype etype;
cl_object vec;
if (ecl_unlikely(!ECL_SSE_PACK_P(x))) {
FEwrong_type_nth_arg(@[ext::sse-pack-to-vector], 1, x, @[ext::sse-pack]);
}
etype = x->sse.elttype;
if (elt_type != Cnil)
etype = symbol_to_sse_elttype(elt_type);
vec = ecl_alloc_simple_vector(16/ecl_aet_size[etype], etype);
memcpy(vec->vector.self.b8, x->sse.data.b8, 16);
@(return vec)
}
cl_object
si_vector_to_sse_pack(cl_object x)
{
cl_object ssev;
if (ecl_unlikely(!ECL_ARRAYP(x))) {
FEwrong_type_nth_arg(@[ext::vector-to-sse-pack], 1, x, @[array]);
}
verify_sse_elttype(x->vector.elttype);
if (ecl_unlikely(x->vector.dim * ecl_aet_size[x->vector.elttype] != 16))
FEerror("Wrong vector size in VECTOR-TO-SSE-PACK: ~S",1,MAKE_FIXNUM(x->vector.dim));
ssev = ecl_alloc_object(t_sse_pack);
ssev->sse.elttype = x->vector.elttype;
memcpy(ssev->sse.data.b8, x->vector.self.b8, 16);
@(return ssev)
}
#endif // ECL_SSE2

View file

@ -1928,5 +1928,19 @@ cl_symbols[] = {
{SYS_ "SERIALIZE", SI_ORDINARY, si_serialize, 1, OBJNULL},
{SYS_ "DESERIALIZE", SI_ORDINARY, si_deserialize, 1, OBJNULL},
#ifdef ECL_SSE2
{EXT_ "SSE-PACK", EXT_ORDINARY, NULL, -1, OBJNULL},
{EXT_ "SSE-PACK-P", EXT_ORDINARY, si_sse_pack_p, 1, OBJNULL},
{EXT_ "SSE-PACK-AS-ELT-TYPE", EXT_ORDINARY, si_sse_pack_as_elt_type, 2, OBJNULL},
{EXT_ "VECTOR-TO-SSE-PACK", EXT_ORDINARY, si_vector_to_sse_pack, 1, OBJNULL},
{EXT_ "SSE-PACK-TO-VECTOR", EXT_ORDINARY, si_sse_pack_to_vector, 2, OBJNULL},
{EXT_ "INT-SSE-PACK", EXT_ORDINARY, NULL, -1, OBJNULL},
{EXT_ "INT-SSE-PACK-P", EXT_ORDINARY, si_int_sse_pack_p, 1, OBJNULL},
{EXT_ "FLOAT-SSE-PACK", EXT_ORDINARY, NULL, -1, OBJNULL},
{EXT_ "FLOAT-SSE-PACK-P", EXT_ORDINARY, si_float_sse_pack_p, 1, OBJNULL},
{EXT_ "DOUBLE-SSE-PACK", EXT_ORDINARY, NULL, -1, OBJNULL},
{EXT_ "DOUBLE-SSE-PACK-P", EXT_ORDINARY, si_double_sse_pack_p, 1, OBJNULL},
#endif
/* Tag for end of list */
{NULL, CL_ORDINARY, NULL, -1, OBJNULL}};

View file

@ -1928,5 +1928,19 @@ cl_symbols[] = {
{SYS_ "SERIALIZE","si_serialize"},
{SYS_ "DESERIALIZE","si_deserialize"},
#ifdef ECL_SSE2
{EXT_ "SSE-PACK",NULL},
{EXT_ "SSE-PACK-P","si_sse_pack_p"},
{EXT_ "SSE-PACK-AS-ELT-TYPE","si_sse_pack_as_elt_type"},
{EXT_ "VECTOR-TO-SSE-PACK","si_vector_to_sse_pack"},
{EXT_ "SSE-PACK-TO-VECTOR","si_sse_pack_to_vector"},
{EXT_ "INT-SSE-PACK",NULL},
{EXT_ "INT-SSE-PACK-P","si_int_sse_pack_p"},
{EXT_ "FLOAT-SSE-PACK",NULL},
{EXT_ "FLOAT-SSE-PACK-P","si_float_sse_pack_p"},
{EXT_ "DOUBLE-SSE-PACK",NULL},
{EXT_ "DOUBLE-SSE-PACK-P","si_double_sse_pack_p"},
#endif
/* Tag for end of list */
{NULL,NULL}};

View file

@ -162,6 +162,10 @@ ecl_type_to_symbol(cl_type t)
return @'si::frame';
case t_weak_pointer:
return @'ext::weak-pointer';
#ifdef ECL_SSE2
case t_sse_pack:
return @'ext::sse-pack';
#endif
default:
ecl_internal_error("not a lisp data object");
}
@ -322,6 +326,15 @@ cl_type_of(cl_object x)
case t_list:
t = Null(x) ? @'null' : @'cons';
break;
#ifdef ECL_SSE2
case t_sse_pack:
switch (x->sse.elttype) {
case aet_sf: t = @'ext::float-sse-pack'; break;
case aet_df: t = @'ext::double-sse-pack'; break;
default: t = @'ext::int-sse-pack'; break;
}
break;
#endif
default:
t = ecl_type_to_symbol(tx);
}

View file

@ -86,7 +86,8 @@
#+threads (mp::process)
#+threads (mp::lock)
#+threads (mp::condition-variable)
#+semaphores (mp::semaphore))))
#+semaphores (mp::semaphore)
#+sse2 (ext::sse-pack))))
(loop for (name . rest) in '#.+builtin-classes+
with index = 1

View file

@ -253,6 +253,9 @@ typedef unsigned @CL_FIXNUM_TYPE@ cl_hashkey;
# endif
#endif
#if defined(__SSE2__) || (defined(_M_IX86_FP) && _M_IX86_FP >= 2)
#define ECL_SSE2
#endif
/* -CUT-: Everything below this mark will not be installed */
/* -------------------------------------------------------------------- *

View file

@ -72,6 +72,11 @@
# endif /* ECL_THREADS */
#endif /* _MSC_VER || __MINGW32__ */
#ifdef ECL_SSE2
#include <xmmintrin.h>
#include <emmintrin.h>
#endif
#include <ecl/object.h>
#include <ecl/external.h>
#include <ecl/stacks.h>

View file

@ -1503,6 +1503,19 @@ extern ECL_API cl_fixnum ecl_length(cl_object x);
extern ECL_API cl_object ecl_subseq(cl_object seq, cl_index start, cl_index limit);
extern ECL_API cl_object ecl_copy_seq(cl_object seq);
#ifdef ECL_SSE2
/* sse2.c */
extern ECL_API cl_object si_sse_pack_p(cl_object x);
extern ECL_API cl_object si_int_sse_pack_p(cl_object x);
extern ECL_API cl_object si_float_sse_pack_p(cl_object x);
extern ECL_API cl_object si_double_sse_pack_p(cl_object x);
extern ECL_API cl_object si_sse_pack_as_elt_type(cl_object x, cl_object type);
extern ECL_API cl_object si_vector_to_sse_pack(cl_object x);
extern ECL_API cl_object si_sse_pack_to_vector(cl_object x, cl_object elt_type);
#endif
/* stacks.c */

View file

@ -87,6 +87,9 @@ typedef enum {
t_foreign,
t_frame,
t_weak_pointer,
#ifdef ECL_SSE2
t_sse_pack,
#endif
t_end,
t_other,
t_contiguous, /* contiguous block */
@ -179,6 +182,9 @@ typedef cl_object (*cl_objectfn_fixed)();
#define ECL_PATHNAMEP(x) ((IMMEDIATE(x) == 0) && ((x)->d.t == t_pathname))
#define ECL_READTABLEP(x) ((IMMEDIATE(x) == 0) && ((x)->d.t == t_readtable))
#define ECL_FOREIGN_DATA_P(x) ((IMMEDIATE(x) == 0) && ((x)->d.t == t_foreign))
#ifdef ECL_SSE2
#define ECL_SSE_PACK_P(x) ((IMMEDIATE(x) == 0) && ((x)->d.t == t_sse_pack))
#endif
#define HEADER int8_t t, m, padding1, padding2
#define HEADER1(field) int8_t t, m, field, padding
@ -939,6 +945,36 @@ struct ecl_instance { /* instance header */
};
#endif /* CLOS */
#ifdef ECL_SSE2
union ecl_sse_data {
__m128 vf;
__m128i vi;
__m128d vd;
uint8_t b8[16];
int8_t i8[16];
#ifdef ecl_uint16_t
ecl_uint16_t b16[8];
ecl_int16_t i16[8];
#endif
#ifdef ecl_uint32_t
ecl_uint32_t b32[4];
ecl_int32_t i32[4];
#endif
#ifdef ecl_uint64_t
ecl_uint64_t b64[2];
ecl_int64_t i64[2];
#endif
float sf[4];
double df[2];
};
struct ecl_sse_pack {
HEADER1(elttype);
union ecl_sse_data data;
};
#endif
/*
Definition of lispunion.
*/
@ -991,6 +1027,9 @@ union cl_lispunion {
struct ecl_foreign foreign; /* user defined data type */
struct ecl_stack_frame frame; /* stack frame */
struct ecl_weak_pointer weak; /* weak pointers */
#ifdef ECL_SSE2
struct ecl_sse_pack sse;
#endif
};
/*

View file

@ -342,6 +342,10 @@ and is not adjustable."
(STRING . STRINGP)
(STRUCTURE . SYS:STRUCTUREP)
(SYMBOL . SYMBOLP)
#+sse2 (EXT:SSE-PACK . EXT:SSE-PACK-P)
#+sse2 (EXT:INT-SSE-PACK . EXT:INT-SSE-PACK-P)
#+sse2 (EXT:FLOAT-SSE-PACK . EXT:FLOAT-SSE-PACK-P)
#+sse2 (EXT:DOUBLE-SSE-PACK . EXT:DOUBLE-SSE-PACK-P)
(T . CONSTANTLY-T)
(VECTOR . VECTORP))))
@ -650,6 +654,14 @@ if not possible."
(FUNCTION (coerce-to-function object))
((VECTOR SIMPLE-VECTOR #+unicode SIMPLE-BASE-STRING SIMPLE-STRING #+unicode BASE-STRING STRING BIT-VECTOR SIMPLE-BIT-VECTOR)
(concatenate type object))
#+sse2
((EXT:INT-SSE-PACK EXT:FLOAT-SSE-PACK EXT:DOUBLE-SSE-PACK)
(if (ext:sse-pack-p object)
(ext:sse-pack-as-elt-type object (case type
(EXT:INT-SSE-PACK '(unsigned-byte 8))
(EXT:FLOAT-SSE-PACK 'single-float)
(EXT:DOUBLE-SSE-PACK 'double-float)))
(error-coerce object type)))
(t
(if (or (listp object) (vector object))
(concatenate type object)
@ -1219,6 +1231,12 @@ if not possible."
#+threads (MP::PROCESS)
#+threads (MP::LOCK)
#+ffi (FOREIGN-DATA)
#+sse2 (EXT:SSE-PACK (OR EXT:INT-SSE-PACK
EXT:FLOAT-SSE-PACK
EXT:DOUBLE-SSE-PACK))
#+sse2 (EXT:INT-SSE-PACK)
#+sse2 (EXT:FLOAT-SSE-PACK)
#+sse2 (EXT:DOUBLE-SSE-PACK)
(CODE-BLOCK)
))