Enable automatic coercion of SSE subtypes via _mm_cast* intrinsics.

Assume that any sse-pack subtype matches any other one in inline
expansion matcher, and insert casts when required.

This uses intrinsics that are present only in the recent versions
of compilers, so add fallback inline definitions to a header file.
This commit is contained in:
Alexander Gavrilov 2010-08-27 17:59:44 +04:00 committed by Juan Jose Garcia Ripoll
parent 2f4723012e
commit 8f835233d6
3 changed files with 65 additions and 1 deletions

View file

@ -147,6 +147,11 @@
(return-from inline-type-matches nil))
(push new-type rts)
(setq number-max (maximum-float-type number-max new-type))))
#+sse2
;; Allow implicit casts between SSE subtypes to kick in
((and (type>= 'ext:sse-pack type)
(type>= 'ext:sse-pack arg-type))
(push type rts))
((type>= type arg-type)
(push type rts))
(t (return-from inline-type-matches nil)))))

View file

@ -355,7 +355,20 @@
((:int-sse-pack :float-sse-pack :double-sse-pack)
(case loc-rep-type
((:object)
(wt-from-object-conversion dest-type loc-type dest-rep-type loc))
(wt-from-object-conversion 'ext:sse-pack loc-type dest-rep-type loc))
;; Implicitly cast between SSE subtypes
((:int-sse-pack :float-sse-pack :double-sse-pack)
(wt (ecase dest-rep-type
(:int-sse-pack (ecase loc-rep-type
(:float-sse-pack "_mm_castps_si128")
(:double-sse-pack "_mm_castpd_si128")))
(:float-sse-pack (ecase loc-rep-type
(:int-sse-pack "_mm_castsi128_ps")
(:double-sse-pack "_mm_castpd_ps")))
(:double-sse-pack (ecase loc-rep-type
(:int-sse-pack "_mm_castsi128_pd")
(:float-sse-pack "_mm_castps_pd"))))
"(" loc ")"))
(otherwise
(coercion-error))))
(t

View file

@ -54,6 +54,9 @@
} else do {
#define end_loop_for_on(list) } while (list = ECL_CONS_CDR(list), ECL_CONSP(list))
/*
* Static constant definition.
*/
#ifdef __cplusplus
#define ecl_cast_ptr(type,n) reinterpret_cast<type>((void*)n)
#else
@ -117,10 +120,53 @@
static const cl_object name = (cl_object)(& name ## data)
#ifdef ECL_SSE2
/*
* Static SSE constant.
*/
#define ecl_def_ct_sse_pack(name,type,v0,v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15) \
static const struct ecl_sse_pack name ## data = { \
(int8_t)t_sse_pack, 0, (type), 0, \
{{v0,v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15}} \
}; \
static const cl_object name = (cl_object)(& name ## data)
/*
* Missing SSE intrinsics
*/
#if (defined(__INTEL_COMPILER) ? __INTEL_COMPILER < 810 : defined(_MSC_VER) && (_MSC_VER < 1500))
__forceinline __m128i _mm_castps_si128(__m128 a) { union { __m128 f; __m128i i;} c; c.f = a; return c.i; }
__forceinline __m128 _mm_castsi128_ps(__m128i a) { union { __m128 f; __m128i i;} c; c.i = a; return c.f; }
__forceinline __m128i _mm_castpd_si128(__m128d a) { union { __m128d d; __m128i i;} c; c.d = a; return c.i; }
__forceinline __m128d _mm_castsi128_pd(__m128i a) { union { __m128d d; __m128i i;} c; c.i = a; return c.d; }
__forceinline __m128d _mm_castps_pd(__m128 a) { union { __m128d d; __m128 f;} c; c.f = a; return c.d; }
__forceinline __m128 _mm_castpd_ps(__m128d a) { union { __m128d d; __m128 f;} c; c.d = a; return c.f; }
#elif defined(__GNUC__) && (__GNUC__ < 4) && !defined(__INTEL_COMPILER)
// Copied from GCC 4 headers:
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_castpd_ps(__m128d __A) { return (__m128) __A; }
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_castpd_si128(__m128d __A) { return (__m128i) __A; }
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_castps_pd(__m128 __A) { return (__m128d) __A; }
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_castps_si128(__m128 __A) { return (__m128i) __A; }
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_castsi128_ps(__m128i __A) { return (__m128) __A; }
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_castsi128_pd(__m128i __A) { return (__m128d) __A; }
#endif
#endif /* ECL_SSE2 */