diff --git a/src/cmp/cmpc-inliner.lsp b/src/cmp/cmpc-inliner.lsp index d113a05c7..b8d759e1d 100644 --- a/src/cmp/cmpc-inliner.lsp +++ b/src/cmp/cmpc-inliner.lsp @@ -147,6 +147,11 @@ (return-from inline-type-matches nil)) (push new-type rts) (setq number-max (maximum-float-type number-max new-type)))) + #+sse2 + ;; Allow implicit casts between SSE subtypes to kick in + ((and (type>= 'ext:sse-pack type) + (type>= 'ext:sse-pack arg-type)) + (push type rts)) ((type>= type arg-type) (push type rts)) (t (return-from inline-type-matches nil))))) diff --git a/src/cmp/cmpffi.lsp b/src/cmp/cmpffi.lsp index 3c27f8a0a..3ca9ca69c 100644 --- a/src/cmp/cmpffi.lsp +++ b/src/cmp/cmpffi.lsp @@ -355,7 +355,20 @@ ((:int-sse-pack :float-sse-pack :double-sse-pack) (case loc-rep-type ((:object) - (wt-from-object-conversion dest-type loc-type dest-rep-type loc)) + (wt-from-object-conversion 'ext:sse-pack loc-type dest-rep-type loc)) + ;; Implicitly cast between SSE subtypes + ((:int-sse-pack :float-sse-pack :double-sse-pack) + (wt (ecase dest-rep-type + (:int-sse-pack (ecase loc-rep-type + (:float-sse-pack "_mm_castps_si128") + (:double-sse-pack "_mm_castpd_si128"))) + (:float-sse-pack (ecase loc-rep-type + (:int-sse-pack "_mm_castsi128_ps") + (:double-sse-pack "_mm_castpd_ps"))) + (:double-sse-pack (ecase loc-rep-type + (:int-sse-pack "_mm_castsi128_pd") + (:float-sse-pack "_mm_castps_pd")))) + "(" loc ")")) (otherwise (coercion-error)))) (t diff --git a/src/h/ecl-inl.h b/src/h/ecl-inl.h index c4a01079a..7f508986a 100644 --- a/src/h/ecl-inl.h +++ b/src/h/ecl-inl.h @@ -54,6 +54,9 @@ } else do { #define end_loop_for_on(list) } while (list = ECL_CONS_CDR(list), ECL_CONSP(list)) +/* + * Static constant definition. + */ #ifdef __cplusplus #define ecl_cast_ptr(type,n) reinterpret_cast((void*)n) #else @@ -117,10 +120,53 @@ static const cl_object name = (cl_object)(& name ## data) #ifdef ECL_SSE2 + +/* + * Static SSE constant. + */ + #define ecl_def_ct_sse_pack(name,type,v0,v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15) \ static const struct ecl_sse_pack name ## data = { \ (int8_t)t_sse_pack, 0, (type), 0, \ {{v0,v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15}} \ }; \ static const cl_object name = (cl_object)(& name ## data) + +/* + * Missing SSE intrinsics + */ + +#if (defined(__INTEL_COMPILER) ? __INTEL_COMPILER < 810 : defined(_MSC_VER) && (_MSC_VER < 1500)) + +__forceinline __m128i _mm_castps_si128(__m128 a) { union { __m128 f; __m128i i;} c; c.f = a; return c.i; } +__forceinline __m128 _mm_castsi128_ps(__m128i a) { union { __m128 f; __m128i i;} c; c.i = a; return c.f; } +__forceinline __m128i _mm_castpd_si128(__m128d a) { union { __m128d d; __m128i i;} c; c.d = a; return c.i; } +__forceinline __m128d _mm_castsi128_pd(__m128i a) { union { __m128d d; __m128i i;} c; c.i = a; return c.d; } +__forceinline __m128d _mm_castps_pd(__m128 a) { union { __m128d d; __m128 f;} c; c.f = a; return c.d; } +__forceinline __m128 _mm_castpd_ps(__m128d a) { union { __m128d d; __m128 f;} c; c.d = a; return c.f; } + +#elif defined(__GNUC__) && (__GNUC__ < 4) && !defined(__INTEL_COMPILER) + +// Copied from GCC 4 headers: +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castpd_ps(__m128d __A) { return (__m128) __A; } + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castpd_si128(__m128d __A) { return (__m128i) __A; } + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castps_pd(__m128 __A) { return (__m128d) __A; } + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castps_si128(__m128 __A) { return (__m128i) __A; } + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castsi128_ps(__m128i __A) { return (__m128) __A; } + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castsi128_pd(__m128i __A) { return (__m128d) __A; } + #endif + +#endif /* ECL_SSE2 */ +