From 8f835233d650ac0acfc9ed6a199c22b050a65233 Mon Sep 17 00:00:00 2001 From: Alexander Gavrilov Date: Fri, 27 Aug 2010 17:59:44 +0400 Subject: [PATCH] Enable automatic coercion of SSE subtypes via _mm_cast* intrinsics. Assume that any sse-pack subtype matches any other one in inline expansion matcher, and insert casts when required. This uses intrinsics that are present only in the recent versions of compilers, so add fallback inline definitions to a header file. --- src/cmp/cmpc-inliner.lsp | 5 +++++ src/cmp/cmpffi.lsp | 15 ++++++++++++- src/h/ecl-inl.h | 46 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+), 1 deletion(-) diff --git a/src/cmp/cmpc-inliner.lsp b/src/cmp/cmpc-inliner.lsp index d113a05c7..b8d759e1d 100644 --- a/src/cmp/cmpc-inliner.lsp +++ b/src/cmp/cmpc-inliner.lsp @@ -147,6 +147,11 @@ (return-from inline-type-matches nil)) (push new-type rts) (setq number-max (maximum-float-type number-max new-type)))) + #+sse2 + ;; Allow implicit casts between SSE subtypes to kick in + ((and (type>= 'ext:sse-pack type) + (type>= 'ext:sse-pack arg-type)) + (push type rts)) ((type>= type arg-type) (push type rts)) (t (return-from inline-type-matches nil))))) diff --git a/src/cmp/cmpffi.lsp b/src/cmp/cmpffi.lsp index 3c27f8a0a..3ca9ca69c 100644 --- a/src/cmp/cmpffi.lsp +++ b/src/cmp/cmpffi.lsp @@ -355,7 +355,20 @@ ((:int-sse-pack :float-sse-pack :double-sse-pack) (case loc-rep-type ((:object) - (wt-from-object-conversion dest-type loc-type dest-rep-type loc)) + (wt-from-object-conversion 'ext:sse-pack loc-type dest-rep-type loc)) + ;; Implicitly cast between SSE subtypes + ((:int-sse-pack :float-sse-pack :double-sse-pack) + (wt (ecase dest-rep-type + (:int-sse-pack (ecase loc-rep-type + (:float-sse-pack "_mm_castps_si128") + (:double-sse-pack "_mm_castpd_si128"))) + (:float-sse-pack (ecase loc-rep-type + (:int-sse-pack "_mm_castsi128_ps") + (:double-sse-pack "_mm_castpd_ps"))) + (:double-sse-pack (ecase loc-rep-type + (:int-sse-pack "_mm_castsi128_pd") + (:float-sse-pack "_mm_castps_pd")))) + "(" loc ")")) (otherwise (coercion-error)))) (t diff --git a/src/h/ecl-inl.h b/src/h/ecl-inl.h index c4a01079a..7f508986a 100644 --- a/src/h/ecl-inl.h +++ b/src/h/ecl-inl.h @@ -54,6 +54,9 @@ } else do { #define end_loop_for_on(list) } while (list = ECL_CONS_CDR(list), ECL_CONSP(list)) +/* + * Static constant definition. + */ #ifdef __cplusplus #define ecl_cast_ptr(type,n) reinterpret_cast((void*)n) #else @@ -117,10 +120,53 @@ static const cl_object name = (cl_object)(& name ## data) #ifdef ECL_SSE2 + +/* + * Static SSE constant. + */ + #define ecl_def_ct_sse_pack(name,type,v0,v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15) \ static const struct ecl_sse_pack name ## data = { \ (int8_t)t_sse_pack, 0, (type), 0, \ {{v0,v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15}} \ }; \ static const cl_object name = (cl_object)(& name ## data) + +/* + * Missing SSE intrinsics + */ + +#if (defined(__INTEL_COMPILER) ? __INTEL_COMPILER < 810 : defined(_MSC_VER) && (_MSC_VER < 1500)) + +__forceinline __m128i _mm_castps_si128(__m128 a) { union { __m128 f; __m128i i;} c; c.f = a; return c.i; } +__forceinline __m128 _mm_castsi128_ps(__m128i a) { union { __m128 f; __m128i i;} c; c.i = a; return c.f; } +__forceinline __m128i _mm_castpd_si128(__m128d a) { union { __m128d d; __m128i i;} c; c.d = a; return c.i; } +__forceinline __m128d _mm_castsi128_pd(__m128i a) { union { __m128d d; __m128i i;} c; c.i = a; return c.d; } +__forceinline __m128d _mm_castps_pd(__m128 a) { union { __m128d d; __m128 f;} c; c.f = a; return c.d; } +__forceinline __m128 _mm_castpd_ps(__m128d a) { union { __m128d d; __m128 f;} c; c.d = a; return c.f; } + +#elif defined(__GNUC__) && (__GNUC__ < 4) && !defined(__INTEL_COMPILER) + +// Copied from GCC 4 headers: +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castpd_ps(__m128d __A) { return (__m128) __A; } + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castpd_si128(__m128d __A) { return (__m128i) __A; } + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castps_pd(__m128 __A) { return (__m128d) __A; } + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castps_si128(__m128 __A) { return (__m128i) __A; } + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castsi128_ps(__m128i __A) { return (__m128) __A; } + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castsi128_pd(__m128i __A) { return (__m128d) __A; } + #endif + +#endif /* ECL_SSE2 */ +