mirror of
https://gitlab.com/embeddable-common-lisp/ecl.git
synced 2025-12-06 02:40:26 -08:00
721 lines
34 KiB
Common Lisp
721 lines
34 KiB
Common Lisp
;;; -*- mode: Lisp; indent-tabs-mode: nil; -*-
|
|
;;;
|
|
;;; Copyright (c) 2010, Alexander Gavrilov (angavrilov@gmail.com)
|
|
;;;
|
|
;;; This file contains definitions for all SSE intrinsics.
|
|
;;;
|
|
;;; The macros are defined in the *-core.lisp files.
|
|
;;; On SBCL wrapping functions are defined by sbcl-functions.lisp.
|
|
;;;
|
|
|
|
(in-package #:SSE)
|
|
|
|
#+(and ecl (or ecl_min stage1 cross))
|
|
(eval-when (:compile-toplevel)
|
|
;; During the initial bootstrap sequence when the contribs are
|
|
;; compiled, the system does not load fasls after building them.
|
|
;; (For all it knows, it might be cross-compiling to another architecture.)
|
|
;; Work around by loading the macro definition file into the interpreter:
|
|
(load (merge-pathnames #P"ecl-sse-core.lisp" *compile-file-truename*)))
|
|
|
|
;;; Prefetch
|
|
|
|
(def-load-intrinsic cpu-prefetch-t0 nil prefetch "_mm_prefetch" :tags (:t0) :size :byte :postfix-fmt ",_MM_HINT_T0")
|
|
(def-load-intrinsic cpu-prefetch-t1 nil prefetch "_mm_prefetch" :tags (:t1) :size :byte :postfix-fmt ",_MM_HINT_T1")
|
|
(def-load-intrinsic cpu-prefetch-t2 nil prefetch "_mm_prefetch" :tags (:t2) :size :byte :postfix-fmt ",_MM_HINT_T2")
|
|
(def-load-intrinsic cpu-prefetch-nta nil prefetch "_mm_prefetch" :tags (:nta) :size :byte :postfix-fmt ",_MM_HINT_NTA")
|
|
|
|
(def-load-intrinsic cpu-clflush nil clflush "_mm_clflush" :size :byte)
|
|
|
|
;;; CPU control
|
|
|
|
#+sbcl
|
|
(progn
|
|
(defknown cpu-mxcsr () (unsigned-byte 32) (flushable))
|
|
|
|
(define-vop (cpu-mxcsr)
|
|
(:translate cpu-mxcsr)
|
|
(:args) (:arg-types)
|
|
(:results (result :scs (unsigned-reg)))
|
|
(:result-types unsigned-num)
|
|
(:temporary (:sc unsigned-stack) tmp)
|
|
(:policy :fast-safe)
|
|
(:generator 3
|
|
(let ((ea (make-ea :dword :base rbp-tn
|
|
:disp (frame-byte-offset (tn-offset tmp)))))
|
|
(inst stmxcsr ea)
|
|
(inst mov (reg-in-size result :dword) ea))))
|
|
|
|
(defknown %set-cpu-mxcsr ((unsigned-byte 32)) (unsigned-byte 32) (unsafe))
|
|
|
|
(define-vop (%set-cpu-mxcsr)
|
|
(:translate %set-cpu-mxcsr)
|
|
(:args (value :scs (unsigned-reg unsigned-stack) :target result))
|
|
(:arg-types unsigned-num)
|
|
(:results (result :scs (unsigned-reg)
|
|
:load-if (not (and (sc-is result unsigned-stack)
|
|
(or (sc-is value unsigned-reg)
|
|
(location= value result))))))
|
|
(:result-types unsigned-num)
|
|
(:temporary (:sc unsigned-stack) tmp)
|
|
(:policy :fast-safe)
|
|
(:generator 3
|
|
(cond ((sc-is value unsigned-stack)
|
|
(setf tmp value))
|
|
((sc-is result unsigned-stack)
|
|
(setf tmp result)))
|
|
(move tmp value)
|
|
(unless (location= result tmp)
|
|
(move result value))
|
|
(let ((ea (make-ea :dword :base rbp-tn
|
|
:disp (frame-byte-offset (tn-offset tmp)))))
|
|
(inst ldmxcsr ea))))
|
|
|
|
(macrolet ((defvoid (name insn)
|
|
`(progn
|
|
(export ',name)
|
|
(defknown ,name () (values) ())
|
|
(define-vop (,name)
|
|
(:translate ,name)
|
|
(:policy :fast-safe)
|
|
(:generator 1
|
|
(inst ,insn))))))
|
|
(defvoid cpu-load-fence lfence)
|
|
(defvoid cpu-store-fence sfence)
|
|
(defvoid cpu-memory-fence mfence)
|
|
(defvoid cpu-pause pause)))
|
|
|
|
#+ecl
|
|
(progn
|
|
(def-intrinsic cpu-mxcsr () fixnum "_mm_getcsr")
|
|
(def-intrinsic %set-cpu-mxcsr (fixnum) fixnum "_mm_setcsr" :export nil :ret-arg 0)
|
|
|
|
(defsetf cpu-mxcsr %set-cpu-mxcsr)
|
|
|
|
(def-intrinsic cpu-load-fence () nil "_mm_lfence")
|
|
(def-intrinsic cpu-store-fence () nil "_mm_sfence")
|
|
(def-intrinsic cpu-memory-fence () nil "_mm_mfence")
|
|
|
|
(def-intrinsic cpu-pause () nil "_mm_pause"))
|
|
|
|
;;; Single-float
|
|
|
|
;; Initialization
|
|
|
|
#+sbcl
|
|
(def-float-set-intrinsic set-ss %set-ss single-float single-reg float-sse-pack movaps)
|
|
|
|
#+ecl
|
|
(progn
|
|
(def-intrinsic set-ss (single-float) float-sse-pack "_mm_set_ss")
|
|
(def-intrinsic set1-ps (single-float) float-sse-pack "_mm_set1_ps")
|
|
|
|
(def-intrinsic set-ps (single-float single-float single-float single-float) float-sse-pack "_mm_set_ps")
|
|
(def-intrinsic setr-ps (single-float single-float single-float single-float) float-sse-pack "_mm_setr_ps")
|
|
|
|
(def-intrinsic setzero-ps () float-sse-pack "_mm_setzero_ps"))
|
|
|
|
;; Memory
|
|
|
|
(def-load-intrinsic mem-ref-ss float-sse-pack movss "_mm_load_ss")
|
|
|
|
(def-load-intrinsic mem-ref-ps float-sse-pack movups "_mm_loadu_ps")
|
|
(def-load-intrinsic mem-ref-aps float-sse-pack movaps "_mm_load_ps")
|
|
|
|
(def-store-intrinsic mem-set-ss float-sse-pack movss "_mm_store_ss" :setf-name mem-ref-ss)
|
|
|
|
(def-store-intrinsic mem-set-ps float-sse-pack movups "_mm_storeu_ps" :setf-name mem-ref-ps)
|
|
(def-store-intrinsic mem-set-aps float-sse-pack movaps "_mm_store_ps" :setf-name mem-ref-aps)
|
|
|
|
(def-store-intrinsic stream-ps float-sse-pack movntps "_mm_stream_ps")
|
|
|
|
;; Arithmetics
|
|
|
|
(def-binary-intrinsic add-ss float-sse-pack addss 3 "_mm_add_ss")
|
|
(def-binary-intrinsic add-ps float-sse-pack addps 3 "_mm_add_ps" :commutative t)
|
|
(def-binary-intrinsic sub-ss float-sse-pack subss 3 "_mm_sub_ss")
|
|
(def-binary-intrinsic sub-ps float-sse-pack subps 3 "_mm_sub_ps")
|
|
(def-binary-intrinsic mul-ss float-sse-pack mulss 5 "_mm_mul_ss")
|
|
(def-binary-intrinsic mul-ps float-sse-pack mulps 5 "_mm_mul_ps" :commutative t)
|
|
(def-binary-intrinsic div-ss float-sse-pack divss 13 "_mm_div_ss")
|
|
(def-binary-intrinsic div-ps float-sse-pack divps 13 "_mm_div_ps")
|
|
(def-binary-intrinsic min-ss float-sse-pack minss 3 "_mm_min_ss")
|
|
(def-binary-intrinsic min-ps float-sse-pack minps 3 "_mm_min_ps":commutative t)
|
|
(def-binary-intrinsic max-ss float-sse-pack maxss 3 "_mm_max_ss")
|
|
(def-binary-intrinsic max-ps float-sse-pack maxps 3 "_mm_max_ps" :commutative t)
|
|
|
|
(def-unary-intrinsic sqrt-ss float-sse-pack sqrtss 20 "_mm_sqrt_ss" :partial t)
|
|
(def-unary-intrinsic sqrt-ps float-sse-pack sqrtps 20 "_mm_sqrt_ps")
|
|
(def-unary-intrinsic rsqrt-ss float-sse-pack rsqrtss 20 "_mm_rsqrt_ss" :partial t)
|
|
(def-unary-intrinsic rsqrt-ps float-sse-pack rsqrtps 20 "_mm_rsqrt_ps")
|
|
(def-unary-intrinsic rcp-ss float-sse-pack rcpss 13 "_mm_rcp_ss" :partial t)
|
|
(def-unary-intrinsic rcp-ps float-sse-pack rcpps 13 "_mm_rcp_ps")
|
|
|
|
;; Bitwise logic
|
|
|
|
#+sbcl
|
|
(def-not-intrinsic not-ps float-sse-pack xorps)
|
|
|
|
(def-binary-intrinsic and-ps float-sse-pack andps 1 "_mm_and_ps" :commutative t)
|
|
(def-binary-intrinsic andnot-ps float-sse-pack andnps 1 "_mm_andnot_ps")
|
|
(def-binary-intrinsic or-ps float-sse-pack orps 1 "_mm_or_ps" :commutative t)
|
|
(def-binary-intrinsic xor-ps float-sse-pack xorps 1 "_mm_xor_ps" :commutative t)
|
|
|
|
;; Comparisons
|
|
|
|
(def-binary-intrinsic =-ss float-sse-pack cmpss 3 "_mm_cmpeq_ss" :tags (:eq))
|
|
(def-binary-intrinsic =-ps float-sse-pack cmpps 3 "_mm_cmpeq_ps" :tags (:eq) :commutative t)
|
|
(def-binary-intrinsic <-ss float-sse-pack cmpss 3 "_mm_cmplt_ss" :tags (:lt))
|
|
(def-binary-intrinsic <-ps float-sse-pack cmpps 3 "_mm_cmplt_ps" :tags (:lt))
|
|
(def-binary-intrinsic <=-ss float-sse-pack cmpss 3 "_mm_cmple_ss" :tags (:le))
|
|
(def-binary-intrinsic <=-ps float-sse-pack cmpps 3 "_mm_cmple_ps" :tags (:le))
|
|
#+ecl
|
|
(def-binary-intrinsic >-ss float-sse-pack nil nil "_mm_cmpgt_ss")
|
|
#+ecl
|
|
(def-binary-intrinsic >-ps float-sse-pack nil nil "_mm_cmpgt_ps")
|
|
#+ecl
|
|
(def-binary-intrinsic >=-ss float-sse-pack nil nil "_mm_cmpge_ss")
|
|
#+ecl
|
|
(def-binary-intrinsic >=-ps float-sse-pack nil nil "_mm_cmpge_ps")
|
|
|
|
(def-binary-intrinsic /=-ss float-sse-pack cmpss 3 "_mm_cmpneq_ss" :tags (:neq))
|
|
(def-binary-intrinsic /=-ps float-sse-pack cmpps 3 "_mm_cmpneq_ps" :tags (:neq) :commutative t)
|
|
(def-binary-intrinsic /<-ss float-sse-pack cmpss 3 "_mm_cmpnlt_ss" :tags (:nlt))
|
|
(def-binary-intrinsic /<-ps float-sse-pack cmpps 3 "_mm_cmpnlt_ps" :tags (:nlt))
|
|
(def-binary-intrinsic /<=-ss float-sse-pack cmpss 3 "_mm_cmpnle_ss" :tags (:nle))
|
|
(def-binary-intrinsic /<=-ps float-sse-pack cmpps 3 "_mm_cmpnle_ps" :tags (:nle))
|
|
#+ecl
|
|
(def-binary-intrinsic />-ss float-sse-pack nil nil "_mm_cmpngt_ss")
|
|
#+ecl
|
|
(def-binary-intrinsic />-ps float-sse-pack nil nil "_mm_cmpngt_ps")
|
|
#+ecl
|
|
(def-binary-intrinsic />=-ss float-sse-pack nil nil "_mm_cmpnge_ss")
|
|
#+ecl
|
|
(def-binary-intrinsic />=-ps float-sse-pack nil nil "_mm_cmpnge_ps")
|
|
|
|
(def-binary-intrinsic cmpord-ss float-sse-pack cmpss 3 "_mm_cmpord_ss" :tags (:ord)) ; neither is NaN
|
|
(def-binary-intrinsic cmpord-ps float-sse-pack cmpps 3 "_mm_cmpord_ps" :tags (:ord) :commutative t)
|
|
(def-binary-intrinsic cmpunord-ss float-sse-pack cmpss 3 "_mm_cmpunord_ss" :tags (:unord))
|
|
(def-binary-intrinsic cmpunord-ps float-sse-pack cmpps 3 "_mm_cmpunord_ps" :tags (:unord) :commutative t)
|
|
|
|
(def-comparison-intrinsic =-ss? float-sse-pack comiss 3 "_mm_comieq_ss" :commutative t :tags (:e))
|
|
(def-comparison-intrinsic =-ssu? float-sse-pack ucomiss 3 "_mm_ucomieq_ss" :commutative t :tags (:e))
|
|
(def-comparison-intrinsic <-ss? float-sse-pack comiss 3 "_mm_comilt_ss" :tags (:b))
|
|
(def-comparison-intrinsic <-ssu? float-sse-pack ucomiss 3 "_mm_ucomilt_ss" :tags (:b))
|
|
(def-comparison-intrinsic <=-ss? float-sse-pack comiss 3 "_mm_comile_ss" :tags (:be))
|
|
(def-comparison-intrinsic <=-ssu? float-sse-pack ucomiss 3 "_mm_ucomile_ss" :tags (:be))
|
|
(def-comparison-intrinsic >-ss? float-sse-pack comiss 3 "_mm_comigt_ss" :tags (:a))
|
|
(def-comparison-intrinsic >-ssu? float-sse-pack ucomiss 3 "_mm_ucomigt_ss" :tags (:a))
|
|
(def-comparison-intrinsic >=-ss? float-sse-pack comiss 3 "_mm_comige_ss" :tags (:ae))
|
|
(def-comparison-intrinsic >=-ssu? float-sse-pack ucomiss 3 "_mm_ucomige_ss" :tags (:ae))
|
|
(def-comparison-intrinsic /=-ss? float-sse-pack comiss 3 "_mm_comineq_ss" :commutative t :tags (:ne))
|
|
(def-comparison-intrinsic /=-ssu? float-sse-pack ucomiss 3 "_mm_ucomineq_ss" :commutative t :tags (:ne))
|
|
|
|
;; Misc
|
|
|
|
(def-binary-intrinsic unpackhi-ps float-sse-pack unpckhps 1 "_mm_unpackhi_ps")
|
|
(def-binary-intrinsic unpacklo-ps float-sse-pack unpcklps 1 "_mm_unpacklo_ps")
|
|
|
|
(def-binary-intrinsic move-ss float-sse-pack movss 1 "_mm_move_ss")
|
|
|
|
(def-binary-intrinsic movehl-ps float-sse-pack movhlps 1 "_mm_movehl_ps")
|
|
(def-binary-intrinsic movelh-ps float-sse-pack movlhps 1 "_mm_movelh_ps")
|
|
|
|
(def-unary-intrinsic movemask-ps (unsigned-byte 4) movmskps 1 "_mm_movemask_ps" :arg-type float-sse-pack)
|
|
|
|
;; Shuffle
|
|
|
|
(def-binary-intrinsic shuffle-ps float-sse-pack shufps 1 "_mm_shuffle_ps" :immediate-arg (unsigned-byte 8))
|
|
|
|
;; Conversion
|
|
|
|
(def-unary-intrinsic convert-pi32-to-ps float-sse-pack cvtdq2ps 3 "_mm_cvtepi32_ps" :arg-type int-sse-pack)
|
|
(def-unary-intrinsic convert-ps-to-pi32 int-sse-pack cvtps2dq 3 "_mm_cvtps_epi32" :arg-type float-sse-pack)
|
|
(def-unary-intrinsic truncate-ps-to-pi32 int-sse-pack cvttps2dq 3 "_mm_cvttps_epi32" :arg-type float-sse-pack)
|
|
|
|
(def-sse-int-intrinsic convert-si32-to-ss (signed-byte 32) float-sse-pack cvtsi2ss 3 "_mm_cvtsi32_ss")
|
|
(def-cvt-to-int32-intrinsic convert-ss-to-si32 (signed-byte 32) cvtss2si 3 "_mm_cvtss_si32" :arg-type float-sse-pack)
|
|
(def-cvt-to-int32-intrinsic truncate-ss-to-si32 (signed-byte 32) cvttss2si 3 "_mm_cvttss_si32" :arg-type float-sse-pack)
|
|
|
|
#+(or x86_64 x86-64)
|
|
(def-sse-int-intrinsic convert-si64-to-ss (signed-byte 64) float-sse-pack cvtsi2ss 3
|
|
#-msvc "_mm_cvtsi64_ss" #+msvc "_mm_cvtsi64x_ss")
|
|
#+(or x86_64 x86-64)
|
|
(def-unary-intrinsic convert-ss-to-si64 (signed-byte 64) cvtss2si 3
|
|
#-msvc "_mm_cvtss_si64" #+msvc "_mm_cvtss_si64x" :arg-type float-sse-pack)
|
|
#+(or x86_64 x86-64)
|
|
(def-unary-intrinsic truncate-ss-to-si64 (signed-byte 64) cvttss2si 3
|
|
#-msvc "_mm_cvttss_si64" #+msvc "_mm_cvttss_si64x" :arg-type float-sse-pack)
|
|
|
|
;;; Double-float
|
|
|
|
;; Initialization
|
|
|
|
#+sbcl
|
|
(def-float-set-intrinsic set-sd %set-sd double-float double-reg double-sse-pack movapd)
|
|
|
|
#+ecl
|
|
(progn
|
|
(def-intrinsic set-sd (double-float) double-sse-pack "_mm_set_sd")
|
|
(def-intrinsic set1-pd (double-float) double-sse-pack "_mm_set1_pd")
|
|
|
|
(def-intrinsic set-pd (double-float double-float) double-sse-pack "_mm_set_pd")
|
|
(def-intrinsic setr-pd (double-float double-float) double-sse-pack "_mm_setr_pd")
|
|
|
|
(def-intrinsic setzero-pd () double-sse-pack "_mm_setzero_pd"))
|
|
|
|
;; Memory
|
|
|
|
(def-load-intrinsic mem-ref-sd double-sse-pack movsd "_mm_load_sd")
|
|
|
|
(def-load-intrinsic mem-ref-pd double-sse-pack movupd "_mm_loadu_pd")
|
|
(def-load-intrinsic mem-ref-apd double-sse-pack movapd "_mm_load_pd")
|
|
|
|
(def-load-intrinsic loadh-pd double-sse-pack movhpd "_mm_loadh_pd" :register-arg t)
|
|
(def-load-intrinsic loadl-pd double-sse-pack movlpd "_mm_loadl_pd" :register-arg t)
|
|
|
|
(def-store-intrinsic mem-set-sd double-sse-pack movsd "_mm_store_sd" :setf-name mem-ref-sd)
|
|
|
|
(def-store-intrinsic mem-set-pd double-sse-pack movupd "_mm_storeu_pd" :setf-name mem-ref-pd)
|
|
(def-store-intrinsic mem-set-apd double-sse-pack movapd "_mm_store_pd" :setf-name mem-ref-apd)
|
|
|
|
(def-store-intrinsic storeh-pd double-sse-pack movhpd "_mm_storeh_pd")
|
|
(def-store-intrinsic storel-pd double-sse-pack movlpd "_mm_storel_pd")
|
|
|
|
(def-store-intrinsic stream-pd double-sse-pack movntpd "_mm_stream_pd")
|
|
|
|
;; Arithmetics
|
|
|
|
(def-binary-intrinsic add-sd double-sse-pack addsd 3 "_mm_add_sd")
|
|
(def-binary-intrinsic add-pd double-sse-pack addpd 3 "_mm_add_pd" :commutative t)
|
|
(def-binary-intrinsic sub-sd double-sse-pack subsd 3 "_mm_sub_sd")
|
|
(def-binary-intrinsic sub-pd double-sse-pack subpd 3 "_mm_sub_pd")
|
|
(def-binary-intrinsic mul-sd double-sse-pack mulsd 5 "_mm_mul_sd")
|
|
(def-binary-intrinsic mul-pd double-sse-pack mulpd 5 "_mm_mul_pd" :commutative t)
|
|
(def-binary-intrinsic div-sd double-sse-pack divsd 13 "_mm_div_sd")
|
|
(def-binary-intrinsic div-pd double-sse-pack divpd 13 "_mm_div_pd")
|
|
(def-binary-intrinsic min-sd double-sse-pack minsd 3 "_mm_min_sd")
|
|
(def-binary-intrinsic min-pd double-sse-pack minpd 3 "_mm_min_pd" :commutative t)
|
|
(def-binary-intrinsic max-sd double-sse-pack maxsd 3 "_mm_max_sd")
|
|
(def-binary-intrinsic max-pd double-sse-pack maxpd 3 "_mm_max_pd" :commutative t)
|
|
|
|
(def-binary-intrinsic sqrt-sd double-sse-pack sqrtsd 20 "_mm_sqrt_sd")
|
|
(def-unary-intrinsic sqrt-pd double-sse-pack sqrtpd 20 "_mm_sqrt_pd")
|
|
|
|
;; Bitwise logic
|
|
|
|
#+sbcl
|
|
(def-not-intrinsic not-pd double-sse-pack xorpd)
|
|
|
|
(def-binary-intrinsic and-pd double-sse-pack andpd 1 "_mm_and_pd" :commutative t)
|
|
(def-binary-intrinsic andnot-pd double-sse-pack andnpd 1 "_mm_andnot_pd")
|
|
(def-binary-intrinsic or-pd double-sse-pack orpd 1 "_mm_or_pd" :commutative t)
|
|
(def-binary-intrinsic xor-pd double-sse-pack xorpd 1 "_mm_xor_pd" :commutative t)
|
|
|
|
;; Comparisons
|
|
|
|
(def-binary-intrinsic =-sd double-sse-pack cmpsd 3 "_mm_cmpeq_sd" :tags (:eq))
|
|
(def-binary-intrinsic =-pd double-sse-pack cmppd 3 "_mm_cmpeq_pd" :tags (:eq) :commutative t)
|
|
(def-binary-intrinsic <-sd double-sse-pack cmpsd 3 "_mm_cmplt_sd" :tags (:lt))
|
|
(def-binary-intrinsic <-pd double-sse-pack cmppd 3 "_mm_cmplt_pd" :tags (:lt))
|
|
(def-binary-intrinsic <=-sd double-sse-pack cmpsd 3 "_mm_cmple_sd" :tags (:le))
|
|
(def-binary-intrinsic <=-pd double-sse-pack cmppd 3 "_mm_cmple_pd" :tags (:le))
|
|
#+ecl
|
|
(def-binary-intrinsic >-sd double-sse-pack nil nil "_mm_cmpgt_sd")
|
|
#+ecl
|
|
(def-binary-intrinsic >-pd double-sse-pack nil nil "_mm_cmpgt_pd")
|
|
#+ecl
|
|
(def-binary-intrinsic >=-sd double-sse-pack nil nil "_mm_cmpge_sd")
|
|
#+ecl
|
|
(def-binary-intrinsic >=-pd double-sse-pack nil nil "_mm_cmpge_pd")
|
|
|
|
(def-binary-intrinsic /=-sd double-sse-pack cmpsd 3 "_mm_cmpneq_sd" :tags (:neq))
|
|
(def-binary-intrinsic /=-pd double-sse-pack cmppd 3 "_mm_cmpneq_pd" :tags (:neq) :commutative t)
|
|
(def-binary-intrinsic /<-sd double-sse-pack cmpsd 3 "_mm_cmpnlt_sd" :tags (:nlt))
|
|
(def-binary-intrinsic /<-pd double-sse-pack cmppd 3 "_mm_cmpnlt_pd" :tags (:nlt))
|
|
(def-binary-intrinsic /<=-sd double-sse-pack cmpsd 3 "_mm_cmpnle_sd" :tags (:nle))
|
|
(def-binary-intrinsic /<=-pd double-sse-pack cmppd 3 "_mm_cmpnle_pd" :tags (:nle))
|
|
#+ecl
|
|
(def-binary-intrinsic />-sd double-sse-pack nil nil "_mm_cmpngt_sd")
|
|
#+ecl
|
|
(def-binary-intrinsic />-pd double-sse-pack nil nil "_mm_cmpngt_pd")
|
|
#+ecl
|
|
(def-binary-intrinsic />=-sd double-sse-pack nil nil "_mm_cmpnge_sd")
|
|
#+ecl
|
|
(def-binary-intrinsic />=-pd double-sse-pack nil nil "_mm_cmpnge_pd")
|
|
|
|
(def-binary-intrinsic cmpord-sd double-sse-pack cmpsd 3 "_mm_cmpord_sd" :tags (:ord)) ; neither is NaN
|
|
(def-binary-intrinsic cmpord-pd double-sse-pack cmppd 3 "_mm_cmpord_pd" :tags (:ord) :commutative t)
|
|
(def-binary-intrinsic cmpunord-sd double-sse-pack cmpsd 3 "_mm_cmpunord_sd" :tags (:unord))
|
|
(def-binary-intrinsic cmpunord-pd double-sse-pack cmppd 3 "_mm_cmpunord_pd" :tags (:unord) :commutative t)
|
|
|
|
(def-comparison-intrinsic =-sd? double-sse-pack comisd 3 "_mm_comieq_sd" :commutative t :tags (:e))
|
|
(def-comparison-intrinsic =-sdu? double-sse-pack ucomisd 3 "_mm_ucomieq_sd" :commutative t :tags (:e))
|
|
(def-comparison-intrinsic <-sd? double-sse-pack comisd 3 "_mm_comilt_sd" :tags (:b))
|
|
(def-comparison-intrinsic <-sdu? double-sse-pack ucomisd 3 "_mm_ucomilt_sd" :tags (:b))
|
|
(def-comparison-intrinsic <=-sd? double-sse-pack comisd 3 "_mm_comile_sd" :tags (:be))
|
|
(def-comparison-intrinsic <=-sdu? double-sse-pack ucomisd 3 "_mm_ucomile_sd" :tags (:be))
|
|
(def-comparison-intrinsic >-sd? double-sse-pack comisd 3 "_mm_comigt_sd" :tags (:a))
|
|
(def-comparison-intrinsic >-sdu? double-sse-pack ucomisd 3 "_mm_ucomigt_sd" :tags (:a))
|
|
(def-comparison-intrinsic >=-sd? double-sse-pack comisd 3 "_mm_comige_sd" :tags (:ae))
|
|
(def-comparison-intrinsic >=-sdu? double-sse-pack ucomisd 3 "_mm_ucomige_sd" :tags (:ae))
|
|
(def-comparison-intrinsic /=-sd? double-sse-pack comisd 3 "_mm_comineq_sd" :commutative t :tags (:ne))
|
|
(def-comparison-intrinsic /=-sdu? double-sse-pack ucomisd 3 "_mm_ucomineq_sd" :commutative t :tags (:ne))
|
|
|
|
;; Misc
|
|
|
|
(def-binary-intrinsic unpackhi-pd double-sse-pack unpckhpd 1 "_mm_unpackhi_pd")
|
|
(def-binary-intrinsic unpacklo-pd double-sse-pack unpcklpd 1 "_mm_unpacklo_pd")
|
|
|
|
(def-binary-intrinsic move-sd double-sse-pack movsd 1 "_mm_move_sd")
|
|
|
|
(def-unary-intrinsic movemask-pd (unsigned-byte 2) movmskpd 1 "_mm_movemask_pd" :arg-type double-sse-pack)
|
|
|
|
;; Shuffle
|
|
|
|
(def-binary-intrinsic shuffle-pd double-sse-pack shufpd 1 "_mm_shuffle_pd" :immediate-arg (unsigned-byte 2))
|
|
|
|
;; Conversion
|
|
|
|
(def-unary-intrinsic convert-ps-to-pd double-sse-pack cvtps2pd 3 "_mm_cvtps_pd" :arg-type float-sse-pack)
|
|
(def-unary-intrinsic convert-pd-to-ps float-sse-pack cvtpd2ps 3 "_mm_cvtpd_ps" :arg-type double-sse-pack)
|
|
|
|
(def-binary-intrinsic convert-ss-to-sd double-sse-pack cvtss2sd 3 "_mm_cvtss_sd" :y-type float-sse-pack)
|
|
(def-binary-intrinsic convert-sd-to-ss float-sse-pack cvtsd2ss 3 "_mm_cvtsd_ss" :y-type double-sse-pack)
|
|
|
|
(def-unary-intrinsic convert-pi32-to-pd double-sse-pack cvtdq2pd 3 "_mm_cvtepi32_pd" :arg-type int-sse-pack)
|
|
(def-unary-intrinsic convert-pd-to-pi32 int-sse-pack cvtpd2dq 3 "_mm_cvtpd_epi32" :arg-type double-sse-pack)
|
|
(def-unary-intrinsic truncate-pd-to-pi32 int-sse-pack cvttpd2dq 3 "_mm_cvttpd_epi32" :arg-type double-sse-pack)
|
|
|
|
(def-sse-int-intrinsic convert-si32-to-sd (signed-byte 32) double-sse-pack cvtsi2ss 3 "_mm_cvtsi32_sd")
|
|
(def-cvt-to-int32-intrinsic convert-sd-to-si32 (signed-byte 32) cvtsd2si 3 "_mm_cvtsd_si32" :arg-type double-sse-pack)
|
|
(def-cvt-to-int32-intrinsic truncate-sd-to-si32 (signed-byte 32) cvttsd2si 3 "_mm_cvttsd_si32" :arg-type double-sse-pack)
|
|
|
|
#+(or x86_64 x86-64)
|
|
(def-sse-int-intrinsic convert-si64-to-sd (signed-byte 64) double-sse-pack cvtsi2ss 3
|
|
#-msvc "_mm_cvtsi64_sd" #+msvc "_mm_cvtsi64x_sd")
|
|
#+(or x86_64 x86-64)
|
|
(def-unary-intrinsic convert-sd-to-si64 (signed-byte 64) cvtsd2si 3
|
|
#-msvc "_mm_cvtsd_si64" #+msvc "_mm_cvtsd_si64x" :arg-type double-sse-pack)
|
|
#+(or x86_64 x86-64)
|
|
(def-unary-intrinsic truncate-sd-to-si64 (signed-byte 64) cvttsd2si 3
|
|
#-msvc "_mm_cvttsd_si64" #+msvc "_mm_cvttsd_si64x" :arg-type double-sse-pack)
|
|
|
|
;;; Integer
|
|
|
|
;; Initialization
|
|
|
|
#+ecl
|
|
(progn
|
|
(def-intrinsic set1-pi8 (fixnum) int-sse-pack "_mm_set1_epi8")
|
|
(def-intrinsic set1-pi16 (fixnum) int-sse-pack "_mm_set1_epi16")
|
|
(def-intrinsic set1-pi32 (ext:integer32) int-sse-pack "_mm_set1_epi32")
|
|
#+x86_64
|
|
(def-intrinsic set1-pi64 (ext:integer64) int-sse-pack "_mm_set1_epi64x")
|
|
|
|
(def-intrinsic set1-pu32 (ext:byte32) int-sse-pack "_mm_set1_epi32")
|
|
#+x86_64
|
|
(def-intrinsic set1-pu64 (ext:byte64) int-sse-pack "_mm_set1_epi64x")
|
|
|
|
;;-----
|
|
(def-intrinsic set-pi8 (fixnum fixnum fixnum fixnum
|
|
fixnum fixnum fixnum fixnum
|
|
fixnum fixnum fixnum fixnum
|
|
fixnum fixnum fixnum fixnum) int-sse-pack "_mm_set_epi8")
|
|
(def-intrinsic set-pi16 (fixnum fixnum fixnum fixnum
|
|
fixnum fixnum fixnum fixnum) int-sse-pack "_mm_set_epi16")
|
|
(def-intrinsic set-pi32 (ext:integer32 ext:integer32 ext:integer32 ext:integer32) int-sse-pack "_mm_set_epi32")
|
|
#+x86_64
|
|
(def-intrinsic set-pi64 (ext:integer64 ext:integer64) int-sse-pack "_mm_set_epi64x")
|
|
|
|
(def-intrinsic set-pu32 (ext:byte32 ext:byte32 ext:byte32 ext:byte32) int-sse-pack "_mm_set_epi32")
|
|
#+x86_64
|
|
(def-intrinsic set-pu64 (ext:byte64 ext:byte64) int-sse-pack "_mm_set_epi64x")
|
|
|
|
;;-----
|
|
(def-intrinsic setr-pi8 (fixnum fixnum fixnum fixnum
|
|
fixnum fixnum fixnum fixnum
|
|
fixnum fixnum fixnum fixnum
|
|
fixnum fixnum fixnum fixnum) int-sse-pack "_mm_setr_epi8")
|
|
(def-intrinsic setr-pi16 (fixnum fixnum fixnum fixnum
|
|
fixnum fixnum fixnum fixnum) int-sse-pack "_mm_setr_epi16")
|
|
(def-intrinsic setr-pi32 (ext:integer32 ext:integer32 ext:integer32 ext:integer32) int-sse-pack "_mm_setr_epi32")
|
|
#+x86_64
|
|
(def-intrinsic setr-pi64 (ext:integer64 ext:integer64) int-sse-pack "_mm_set_epi64x" :reorder-args t)
|
|
|
|
(def-intrinsic setr-pu32 (ext:byte32 ext:byte32 ext:byte32 ext:byte32) int-sse-pack "_mm_setr_epi32")
|
|
#+x86_64
|
|
(def-intrinsic setr-pu64 (ext:byte64 ext:byte64) int-sse-pack "_mm_set_epi64x" :reorder-args t)
|
|
|
|
;;-----
|
|
(def-intrinsic setzero-pi () int-sse-pack "_mm_setzero_si128"))
|
|
|
|
;; Memory
|
|
|
|
(def-load-intrinsic mem-ref-pi int-sse-pack movdqu "_mm_loadu_si128")
|
|
(def-load-intrinsic mem-ref-api int-sse-pack movdqa "_mm_load_si128")
|
|
|
|
(def-load-intrinsic mem-ref-si64 int-sse-pack movd "_mm_loadl_epi64")
|
|
|
|
(def-store-intrinsic mem-set-pi int-sse-pack movdqu "_mm_storeu_si128" :setf-name mem-ref-pi)
|
|
(def-store-intrinsic mem-set-api int-sse-pack movdqa "_mm_store_si128" :setf-name mem-ref-api)
|
|
|
|
(def-store-intrinsic mem-set-si64 int-sse-pack movd "_mm_storel_epi64" :setf-name mem-ref-si64)
|
|
|
|
(def-store-intrinsic stream-pi int-sse-pack movntdq "_mm_stream_si128")
|
|
|
|
;; Masked move
|
|
|
|
#+ecl
|
|
(def-mem-intrinsic maskmoveu-pi "char" nil "_mm_maskmoveu_si128" :prefix-args (int-sse-pack int-sse-pack))
|
|
|
|
#+sbcl
|
|
(progn
|
|
(defknown %maskmoveu-pi (sse-pack sse-pack system-area-pointer fixnum) (values) (unsafe))
|
|
|
|
(define-vop (%maskmoveu-pi)
|
|
(:translate %maskmoveu-pi)
|
|
(:args (value :scs (sse-reg))
|
|
(mask :scs (sse-reg))
|
|
(sap :scs (sap-reg) :target rdi)
|
|
(offset :scs (signed-reg)))
|
|
(:arg-types sse-pack sse-pack system-area-pointer signed-num)
|
|
(:temporary (:sc sap-reg :offset rdi-offset :from :eval) rdi)
|
|
(:policy :fast-safe)
|
|
(:note "inline MASKMOVEU operation")
|
|
(:generator 5
|
|
(if (location= sap rdi)
|
|
(inst add rdi offset)
|
|
(inst lea rdi (make-ea :qword :base sap :index offset)))
|
|
(inst maskmovdqu value mask)))
|
|
|
|
(define-vop (%maskmoveu-pi-c)
|
|
(:translate %maskmoveu-pi)
|
|
(:args (value :scs (sse-reg))
|
|
(mask :scs (sse-reg))
|
|
(sap :scs (sap-reg) :target rdi))
|
|
(:arg-types sse-pack sse-pack system-area-pointer (:constant (signed-byte 32)))
|
|
(:info offset)
|
|
(:temporary (:sc sap-reg :offset rdi-offset :from :eval) rdi)
|
|
(:policy :fast-safe)
|
|
(:note "inline MASKMOVEU operation")
|
|
(:generator 4
|
|
(if (location= sap rdi)
|
|
(unless (= offset 0)
|
|
(inst add rdi offset))
|
|
(if (= offset 0)
|
|
(inst mov rdi sap)
|
|
(inst lea rdi (make-ea :qword :base sap :disp offset))))
|
|
(inst maskmovdqu value mask)))
|
|
|
|
(def-splice-transform %maskmoveu-pi (value mask (sap+ sap offset1) offset2)
|
|
(%maskmoveu-pi value mask sap (+ offset1 offset2))))
|
|
|
|
;; Arithmetics
|
|
|
|
(def-binary-intrinsic add-pi8 int-sse-pack paddb 1 "_mm_add_epi8" :commutative t)
|
|
(def-binary-intrinsic add-pi16 int-sse-pack paddw 1 "_mm_add_epi16" :commutative t)
|
|
(def-binary-intrinsic add-pi32 int-sse-pack paddd 1 "_mm_add_epi32" :commutative t)
|
|
(def-binary-intrinsic add-pi64 int-sse-pack paddq 1 "_mm_add_epi64" :commutative t)
|
|
|
|
(def-binary-intrinsic adds-pi8 int-sse-pack paddsb 1 "_mm_adds_epi8" :commutative t)
|
|
(def-binary-intrinsic adds-pi16 int-sse-pack paddsw 1 "_mm_adds_epi16" :commutative t)
|
|
(def-binary-intrinsic adds-pu8 int-sse-pack paddusb 1 "_mm_adds_epu8" :commutative t)
|
|
(def-binary-intrinsic adds-pu16 int-sse-pack paddusw 1 "_mm_adds_epu16" :commutative t)
|
|
|
|
(def-binary-intrinsic avg-pu8 int-sse-pack pavgb 1 "_mm_avg_epu8" :commutative t)
|
|
(def-binary-intrinsic avg-pu16 int-sse-pack pavgw 1 "_mm_avg_epu16" :commutative t)
|
|
|
|
(def-binary-intrinsic madd-pi16 int-sse-pack pmaddwd 1 "_mm_madd_epi16" :commutative t)
|
|
|
|
(def-binary-intrinsic max-pu8 int-sse-pack pmaxub 1 "_mm_max_epu8" :commutative t)
|
|
(def-binary-intrinsic max-pi16 int-sse-pack pmaxsw 1 "_mm_max_epi16" :commutative t)
|
|
(def-binary-intrinsic min-pu8 int-sse-pack pminub 1 "_mm_min_epu8" :commutative t)
|
|
(def-binary-intrinsic min-pi16 int-sse-pack pminsw 1 "_mm_min_epi16" :commutative t)
|
|
|
|
(def-binary-intrinsic mulhi-pi16 int-sse-pack pmulhw 3 "_mm_mulhi_epi16" :commutative t)
|
|
(def-binary-intrinsic mulhi-pu16 int-sse-pack pmulhuw 3 "_mm_mulhi_epu16" :commutative t)
|
|
(def-binary-intrinsic mullo-pi16 int-sse-pack pmullw 3 "_mm_mullo_epi16" :commutative t)
|
|
|
|
(def-binary-intrinsic mul-pu32 int-sse-pack pmuludq 3 "_mm_mul_epu32" :commutative t)
|
|
|
|
(def-binary-intrinsic sad-pu8 int-sse-pack psadbw 1 "_mm_sad_epu8" :commutative t)
|
|
|
|
(def-binary-intrinsic sub-pi8 int-sse-pack psubb 1 "_mm_sub_epi8")
|
|
(def-binary-intrinsic sub-pi16 int-sse-pack psubw 1 "_mm_sub_epi16")
|
|
(def-binary-intrinsic sub-pi32 int-sse-pack psubd 1 "_mm_sub_epi32")
|
|
(def-binary-intrinsic sub-pi64 int-sse-pack psubq 1 "_mm_sub_epi64")
|
|
|
|
(def-binary-intrinsic subs-pi8 int-sse-pack psubsb 1 "_mm_subs_epi8")
|
|
(def-binary-intrinsic subs-pi16 int-sse-pack psubsw 1 "_mm_subs_epi16")
|
|
(def-binary-intrinsic subs-pu8 int-sse-pack psubusb 1 "_mm_subs_epu8")
|
|
(def-binary-intrinsic subs-pu16 int-sse-pack psubusw 1 "_mm_subs_epu16")
|
|
|
|
;; Bitwise logic
|
|
|
|
#+sbcl
|
|
(def-not-intrinsic not-pi int-sse-pack pxor)
|
|
|
|
(def-binary-intrinsic and-pi int-sse-pack pand 1 "_mm_and_si128" :commutative t)
|
|
(def-binary-intrinsic andnot-pi int-sse-pack pandn 1 "_mm_andnot_si128")
|
|
(def-binary-intrinsic or-pi int-sse-pack por 1 "_mm_or_si128" :commutative t)
|
|
(def-binary-intrinsic xor-pi int-sse-pack pxor 1 "_mm_xor_si128" :commutative t)
|
|
|
|
;; Shifts
|
|
|
|
(def-unary-intrinsic slli-pi int-sse-pack pslldq 1 "_mm_slli_si128" :partial :one-arg :immediate-arg (unsigned-byte 8))
|
|
|
|
(def-sse-int-intrinsic slli-pi16 fixnum int-sse-pack psllw 3 "_mm_slli_epi16" :make-temporary t
|
|
:defun-body "_mm_sll_epi16(#0,_mm_cvtsi32_si128(#1))")
|
|
(def-sse-int-intrinsic slli-pi32 fixnum int-sse-pack pslld 3 "_mm_slli_epi32" :make-temporary t
|
|
:defun-body "_mm_sll_epi32(#0,_mm_cvtsi32_si128(#1))")
|
|
(def-sse-int-intrinsic slli-pi64 fixnum int-sse-pack psllq 3 "_mm_slli_epi64" :make-temporary t
|
|
:defun-body "_mm_sll_epi64(#0,_mm_cvtsi32_si128(#1))")
|
|
(def-binary-intrinsic sll-pi16 int-sse-pack psllw 1 "_mm_sll_epi16")
|
|
(def-binary-intrinsic sll-pi32 int-sse-pack pslld 1 "_mm_sll_epi32")
|
|
(def-binary-intrinsic sll-pi64 int-sse-pack psllq 1 "_mm_sll_epi64")
|
|
|
|
(def-sse-int-intrinsic srai-pi16 fixnum int-sse-pack psraw 3 "_mm_srai_epi16" :make-temporary t
|
|
:defun-body "_mm_sra_epi16(#0,_mm_cvtsi32_si128(#1))")
|
|
(def-sse-int-intrinsic srai-pi32 fixnum int-sse-pack psrad 3 "_mm_srai_epi32" :make-temporary t
|
|
:defun-body "_mm_sra_epi32(#0,_mm_cvtsi32_si128(#1))")
|
|
(def-binary-intrinsic sra-pi16 int-sse-pack psraw 1 "_mm_sra_epi16")
|
|
(def-binary-intrinsic sra-pi32 int-sse-pack psrad 1 "_mm_sra_epi32")
|
|
|
|
(def-unary-intrinsic srli-pi int-sse-pack psrldq 1 "_mm_srli_si128" :partial :one-arg :immediate-arg (unsigned-byte 8))
|
|
|
|
(def-sse-int-intrinsic srli-pi16 fixnum int-sse-pack psrlw 3 "_mm_srli_epi16" :make-temporary t
|
|
:defun-body "_mm_srl_epi16(#0,_mm_cvtsi32_si128(#1))")
|
|
(def-sse-int-intrinsic srli-pi32 fixnum int-sse-pack psrld 3 "_mm_srli_epi32" :make-temporary t
|
|
:defun-body "_mm_srl_epi32(#0,_mm_cvtsi32_si128(#1))")
|
|
(def-sse-int-intrinsic srli-pi64 fixnum int-sse-pack psrlq 3 "_mm_srli_epi64" :make-temporary t
|
|
:defun-body "_mm_srl_epi64(#0,_mm_cvtsi32_si128(#1))")
|
|
(def-binary-intrinsic srl-pi16 int-sse-pack psrlw 1 "_mm_srl_epi16")
|
|
(def-binary-intrinsic srl-pi32 int-sse-pack psrld 1 "_mm_srl_epi32")
|
|
(def-binary-intrinsic srl-pi64 int-sse-pack psrlq 1 "_mm_srl_epi64")
|
|
|
|
#+sbcl
|
|
(macrolet ((defimm (name insn bits &key arithmetic)
|
|
`(define-vop (,(symbolicate "%" name "-IMM") sse-int-base-op)
|
|
(:translate ,name)
|
|
(:args (x :scs (sse-reg) :target r))
|
|
(:arg-types sse-pack (:constant fixnum))
|
|
(:result-types sb-kernel:int-sse-pack)
|
|
(:info immv)
|
|
(:generator 1
|
|
,@(let ((core `(progn
|
|
(ensure-move int-sse-pack r x)
|
|
(unless (= immv 0)
|
|
(inst ,insn r immv)))))
|
|
(if arithmetic
|
|
`((when (or (< immv 0) (>= immv ,bits))
|
|
(setf immv ,bits))
|
|
,core)
|
|
`((if (or (< immv 0) (>= immv ,bits))
|
|
(inst pxor r r)
|
|
,core))))))))
|
|
(defimm slli-pi16 psllw-imm 16)
|
|
(defimm slli-pi32 pslld-imm 32)
|
|
(defimm slli-pi64 psllq-imm 64)
|
|
(defimm srai-pi16 psraw-imm 16 :arithmetic t)
|
|
(defimm srai-pi32 psrad-imm 32 :arithmetic t)
|
|
(defimm srli-pi16 psrlw-imm 16)
|
|
(defimm srli-pi32 psrld-imm 32)
|
|
(defimm srli-pi64 psrlq-imm 64))
|
|
|
|
;; Comparisons
|
|
|
|
(def-binary-intrinsic =-pi8 int-sse-pack pcmpeqb 1 "_mm_cmpeq_epi8")
|
|
(def-binary-intrinsic =-pi16 int-sse-pack pcmpeqw 1 "_mm_cmpeq_epi16")
|
|
(def-binary-intrinsic =-pi32 int-sse-pack pcmpeqd 1 "_mm_cmpeq_epi32")
|
|
|
|
#+ecl
|
|
(def-binary-intrinsic <-pi8 int-sse-pack nil nil "_mm_cmplt_epi8")
|
|
#+ecl
|
|
(def-binary-intrinsic <-pi16 int-sse-pack nil nil "_mm_cmplt_epi16")
|
|
#+ecl
|
|
(def-binary-intrinsic <-pi32 int-sse-pack nil nil "_mm_cmplt_epi32")
|
|
|
|
(def-binary-intrinsic >-pi8 int-sse-pack pcmpgtb 1 "_mm_cmpgt_epi8")
|
|
(def-binary-intrinsic >-pi16 int-sse-pack pcmpgtw 1 "_mm_cmpgt_epi16")
|
|
(def-binary-intrinsic >-pi32 int-sse-pack pcmpgtd 1 "_mm_cmpgt_epi32")
|
|
|
|
;; Misc
|
|
|
|
(def-binary-intrinsic packs-pi16 int-sse-pack packsswb 1 "_mm_packs_epi16")
|
|
(def-binary-intrinsic packs-pi32 int-sse-pack packssdw 1 "_mm_packs_epi32")
|
|
(def-binary-intrinsic packus-pi16 int-sse-pack packuswb 1 "_mm_packus_epi16")
|
|
|
|
(def-unary-intrinsic extract-pi16 (unsigned-byte 16) pextrw 1 "_mm_extract_epi16"
|
|
:immediate-arg (unsigned-byte 8) :arg-type int-sse-pack)
|
|
(def-sse-int-intrinsic insert-pi16 fixnum int-sse-pack pinsrw 1 "_mm_insert_epi16"
|
|
:immediate-arg (unsigned-byte 8))
|
|
|
|
(def-unary-intrinsic movemask-pi8 (unsigned-byte 16) pmovmskb 1 "_mm_movemask_epi8" :arg-type int-sse-pack)
|
|
|
|
(def-binary-intrinsic unpackhi-pi8 int-sse-pack punpckhbw 1 "_mm_unpackhi_epi8")
|
|
(def-binary-intrinsic unpackhi-pi16 int-sse-pack punpckhwd 1 "_mm_unpackhi_epi16")
|
|
(def-binary-intrinsic unpackhi-pi32 int-sse-pack punpckhdq 1 "_mm_unpackhi_epi32")
|
|
(def-binary-intrinsic unpackhi-pi64 int-sse-pack punpckhqdq 1 "_mm_unpackhi_epi64")
|
|
|
|
(def-binary-intrinsic unpacklo-pi8 int-sse-pack punpcklbw 1 "_mm_unpacklo_epi8")
|
|
(def-binary-intrinsic unpacklo-pi16 int-sse-pack punpcklwd 1 "_mm_unpacklo_epi16")
|
|
(def-binary-intrinsic unpacklo-pi32 int-sse-pack punpckldq 1 "_mm_unpacklo_epi32")
|
|
(def-binary-intrinsic unpacklo-pi64 int-sse-pack punpcklqdq 1 "_mm_unpacklo_epi64")
|
|
|
|
(def-unary-intrinsic move-pi64 int-sse-pack movq 1 "_mm_move_epi64")
|
|
|
|
;; Shuffle
|
|
|
|
(def-unary-intrinsic shuffle-pi32 int-sse-pack pshufd 1 "_mm_shuffle_epi32" :immediate-arg (unsigned-byte 8))
|
|
(def-unary-intrinsic shufflelo-pi16 int-sse-pack pshuflw 1 "_mm_shufflelo_epi16" :immediate-arg (unsigned-byte 8))
|
|
(def-unary-intrinsic shufflehi-pi16 int-sse-pack pshufhw 1 "_mm_shufflehi_epi16" :immediate-arg (unsigned-byte 8))
|
|
|
|
;; Conversion
|
|
|
|
#+sbcl
|
|
(progn
|
|
(export 'convert-si32-to-pi)
|
|
(defknown convert-si32-to-pi ((signed-byte 32)) int-sse-pack (foldable flushable))
|
|
(export 'convert-su32-to-pi)
|
|
(defknown convert-su32-to-pi ((unsigned-byte 32)) int-sse-pack (foldable flushable))
|
|
(export 'convert-si64-to-pi)
|
|
(defknown convert-si64-to-pi ((signed-byte 64)) int-sse-pack (foldable flushable))
|
|
(export 'convert-su64-to-pi)
|
|
(defknown convert-su64-to-pi ((unsigned-byte 64)) int-sse-pack (foldable flushable))
|
|
(defknown %set-int ((signed-byte 64)) int-sse-pack (foldable flushable always-translatable))
|
|
(defknown %set-uint ((unsigned-byte 64)) int-sse-pack (foldable flushable always-translatable))
|
|
|
|
(define-vop (%set-int)
|
|
(:translate %set-int %set-uint
|
|
convert-si32-to-pi convert-su32-to-pi
|
|
convert-si64-to-pi convert-su64-to-pi)
|
|
(:args (arg :scs (signed-reg unsigned-reg signed-stack unsigned-stack)))
|
|
(:arg-types untagged-num)
|
|
(:results (dst :scs (sse-reg)))
|
|
(:result-types sb-kernel:int-sse-pack)
|
|
(:policy :fast-safe)
|
|
(:generator 1
|
|
(inst movd dst arg))))
|
|
|
|
#+ecl
|
|
(progn
|
|
(def-intrinsic convert-si32-to-pi (ext:integer32) int-sse-pack "_mm_cvtsi32_si128")
|
|
(def-intrinsic convert-su32-to-pi (ext:byte32) int-sse-pack "_mm_cvtsi32_si128")
|
|
#+x86_64
|
|
(def-intrinsic convert-si64-to-pi (ext:integer64) int-sse-pack #-msvc "_mm_cvtsi64_si128" #+msvc "_mm_cvtsi64x_si128")
|
|
#+x86_64
|
|
(def-intrinsic convert-su64-to-pi (ext:byte64) int-sse-pack #-msvc "_mm_cvtsi64_si128" #+msvc "_mm_cvtsi64x_si128"))
|
|
|
|
(def-cvt-to-int32-intrinsic convert-pi-to-si32 (signed-byte 32) movd 1 "_mm_cvtsi128_si32"
|
|
:arg-type int-sse-pack)
|
|
(def-unary-intrinsic convert-pi-to-su32 (unsigned-byte 32) movd 1 "_mm_cvtsi128_si32"
|
|
:result-size :dword :arg-type int-sse-pack)
|
|
|
|
#+(or x86_64 x86-64)
|
|
(def-unary-intrinsic convert-pi-to-si64 (signed-byte 64) movd 1
|
|
#-msvc "_mm_cvtsi128_si64" #+msvc "_mm_cvtsi128_si64x" :arg-type int-sse-pack)
|
|
#+(or x86_64 x86-64)
|
|
(def-unary-intrinsic convert-pi-to-su64 (unsigned-byte 64) movd 1
|
|
#-msvc "_mm_cvtsi128_si64" #+msvc "_mm_cvtsi128_si64x" :arg-type int-sse-pack)
|
|
|