diff --git a/contrib/cl-simd/cl-simd.texinfo b/contrib/cl-simd/cl-simd.texinfo index c28fb927b..41c862aba 100644 --- a/contrib/cl-simd/cl-simd.texinfo +++ b/contrib/cl-simd/cl-simd.texinfo @@ -21,6 +21,7 @@ ECL and SBCL (x86-64 only). * SSE pack types:: * SSE array type:: * Differences from C intrinsics:: +* Comparisons and NaN handling:: * Simple extensions:: * Lisp array accessors:: * Example:: @@ -123,6 +124,10 @@ for @code{cmpleps}, or @code{/>-ps} for @code{cmpngtps}. In some places the set of comparison functions is extended to cover the full possible range. +@item +Scalar comparison predicates are named like @code{..-ss?} for +@code{comiss}, and @code{..-ssu?} for @code{ucomiss} wrappers. + @item Conversion functions are renamed to @code{convert-*-to-*} and @code{truncate-*-to-*}. @@ -151,6 +156,54 @@ and made SETF-able: (The @code{-ap*} version requires alignment.) +@node Comparisons and NaN handling +@subsection Comparisons and NaN handling + +Floating-point arithmetic intrinsics have trivial IEEE semantics +when given QNaN and SNaN arguments. Comparisons have more complex +behavior, detailed in the following table: + +@multitable { @code{/>=-ss, />=-ps} } { @code{/>=-sd, />=-pd} } { Not greater or equal } { Result for NaN } { QNaN traps } +@item Single-float @tab Double-float @tab Condition @tab Result for NaN @tab QNaN traps +@item @code{=-ss}, @code{=-ps} @tab @code{=-sd}, @code{=-pd} @tab Equal @tab False @tab No +@item @code{<-ss}, @code{<-ps} @tab @code{<-sd}, @code{<-pd} @tab Less @tab False @tab Yes +@item @code{<=-ss}, @code{<=-ps} @tab @code{<=-sd}, @code{<=-pd} @tab Less or equal @tab False @tab Yes +@item @code{>-ss}, @code{>-ps} @tab @code{>-sd}, @code{>-pd} @tab Greater @tab False @tab Yes +@item @code{>=-ss}, @code{>=-ps} @tab @code{>=-sd}, @code{>=-pd} @tab Greater or equal @tab False @tab Yes +@item @code{/=-ss}, @code{/=-ps} @tab @code{/=-sd}, @code{/=-pd} @tab Not equal @tab True @tab No +@item @code{/<-ss}, @code{/<-ps} @tab @code{/<-sd}, @code{/<-pd} @tab Not less @tab True @tab Yes +@item @code{/<=-ss}, @code{/<=-ps} @tab @code{/<=-sd}, @code{/<=-pd} @tab Not less or equal @tab True @tab Yes +@item @code{/>-ss}, @code{/>-ps} @tab @code{/>-sd}, @code{/>-pd} @tab Not greater @tab True @tab Yes +@item @code{/>=-ss}, @code{/>=-ps} @tab @code{/>=-sd}, @code{/>=-pd} @tab Not greater or equal @tab True @tab Yes +@item @code{cmpord-ss}, @code{cmpord-ps} @tab @code{cmpord-sd}, @code{cmpord-pd} +@tab Ordered, i.e. no NaN args @tab False @tab No +@item @code{cmpunord-ss}, @code{cmpunord-ps} @tab @code{cmpunord-sd}, @code{cmpunord-pd} +@tab Unordered, i.e. with NaN args @tab True @tab No +@end multitable + +Likewise for scalar comparison predicates, i.e. functions that return the +result of the comparison as a Lisp boolean instead of a bitmask sse-pack: + +@multitable { Single-float } { Double-float } { Not greater or equal } { Result for NaN } { QNaN traps } +@item Single-float @tab Double-float @tab Condition @tab Result for NaN @tab QNaN traps +@item @code{=-ss?} @tab @code{=-sd?} @tab Equal @tab True @tab Yes +@item @code{=-ssu?} @tab @code{=-sdu?} @tab Equal @tab True @tab No +@item @code{<-ss?} @tab @code{<-sd?} @tab Less @tab True @tab Yes +@item @code{<-ssu?} @tab @code{<-sdu?} @tab Less @tab True @tab No +@item @code{<=-ss?} @tab @code{<=-sd?} @tab Less or equal @tab True @tab Yes +@item @code{<=-ssu?} @tab @code{<=-sdu?} @tab Less or equal @tab True @tab No +@item @code{>-ss?} @tab @code{>-sd?} @tab Greater @tab False @tab Yes +@item @code{>-ssu?} @tab @code{>-sdu?} @tab Greater @tab False @tab No +@item @code{>=-ss?} @tab @code{>=-sd?} @tab Greater or equal @tab False @tab Yes +@item @code{>=-ssu?} @tab @code{>=-sdu?} @tab Greater or equal @tab False @tab No +@item @code{/=-ss?} @tab @code{/=-sd?} @tab Not equal @tab False @tab Yes +@item @code{/=-ssu?} @tab @code{/=-sdu?} @tab Not equal @tab False @tab No +@end multitable + +Note that MSDN specifies different return values for the C counterparts of some +of these functions when called with NaN arguments, but that seems to disagree +with the actually generated code. + @node Simple extensions @subsection Simple extensions @@ -213,6 +266,9 @@ this module implements a set of AREF-like memory accessors: @item @code{(ROW-MAJOR-)?AREF-[AS]?P[SDI]} for whole-pack read & write. + +@item +@code{(ROW-MAJOR-)?AREF-S(S|D|I64)} for scalar read & write. @end itemize (Where A = aligned; S = aligned streamed write.) @@ -222,14 +278,14 @@ array or vector, without restriction on the precise element type (although it should be declared at compile time to ensure generation of the fastest code). -Additional index bound checking is done to ensure that 16 +Additional index bound checking is done to ensure that enough bytes of memory are accessible after the specified index. As an exception, ROW-MAJOR-AREF-PREFETCH-* does not do any range checks at all, because the prefetch instructions are officially safe to use with bad addresses. The AREF-PREFETCH-* and *-CLFLUSH functions do only ordinary -index checks without the 16-byte extension. +index checks without the usual 16-byte extension. @node Example @subsection Example diff --git a/contrib/cl-simd/ecl-sse-core.lisp b/contrib/cl-simd/ecl-sse-core.lisp index 71e5006f2..01abdded9 100644 --- a/contrib/cl-simd/ecl-sse-core.lisp +++ b/contrib/cl-simd/ecl-sse-core.lisp @@ -32,6 +32,7 @@ (int-sse-pack :int-sse-pack) (float-sse-pack :float-sse-pack) (double-sse-pack :double-sse-pack) + (boolean :bool) (single-float :float) (double-float :double) (fixnum :fixnum) @@ -107,7 +108,7 @@ (c::def-inline ',name ',mode ',arg-types ',ret-type ,call-str ,@flags))) (defmacro def-intrinsic (name arg-types ret-type c-name - &key (export t) ret-arg reorder-args immediate-args) + &key (export t) ret-arg reorder-args immediate-args defun-body) "Defines and exports an SSE intrinsic function with matching open-coding rules." (let* ((anums (make-arg-nums arg-types)) (asyms (mapcar #'make-arg-name anums)) @@ -130,7 +131,7 @@ ,@(if (null immediate-args) `((defun ,name ,asyms (declare (optimize (speed 0) (debug 0) (safety 1))) - (ffi:c-inline ,asyms ,aftypes ,rftype ,call-str :one-liner t)))) + (ffi:c-inline ,asyms ,aftypes ,rftype ,(or defun-body call-str) :one-liner t)))) (def-inline ,name :always ,(mapcar #'inline-arg-type-of arg-types) ,rftype ,call-str :inline-or-warn t)))) @@ -155,10 +156,15 @@ ,ret-type ,c-name :immediate-args ,(if immediate-arg `((arg2 ,immediate-arg))))) (defmacro def-sse-int-intrinsic (name int-type ret-type insn cost c-name - &key (arg-type ret-type) immediate-arg make-temporary) + &key (arg-type ret-type) immediate-arg make-temporary defun-body) (declare (ignore insn cost make-temporary)) `(def-intrinsic ,name (,arg-type ,int-type ,@(if immediate-arg (list immediate-arg))) - ,ret-type ,c-name :immediate-args ,(if immediate-arg `((arg2 ,immediate-arg))))) + ,ret-type ,c-name :immediate-args ,(if immediate-arg `((arg2 ,immediate-arg))) + :defun-body ,defun-body)) + +(defmacro def-comparison-intrinsic (name arg-type insn cost c-name &key commutative tags) + (declare (ignore insn cost commutative tags)) + `(def-intrinsic ,name (,arg-type ,arg-type) boolean ,c-name)) (defmacro %def-aref-intrinsic (tag val-type c-type reader writer &key (aux-args "") (bsize 16)) "Defines and exports macros and functios that implement vectorized array access." @@ -236,11 +242,10 @@ ,(fmtw "(&(#0)->array.self.~A[#1])" (second spec)))) known-elt-types))))))) -(defmacro def-aref-intrinsic (tag val-type reader-fun writer-fun &key (check-bounds t)) +(defmacro def-aref-intrinsic (tag val-type reader-fun writer-fun &key (ref-size 16)) `(%def-aref-intrinsic ,tag ,val-type ,(pointer-c-type-of val-type) ,(get reader-fun 'c-function-name) ,(get writer-fun 'c-function-name) - :bsize ,(ecase check-bounds - (t 16) ((nil) 0) (:no-gap 1)) + :bsize ,ref-size :aux-args ,(get reader-fun 'c-call-aux-args))) (defmacro def-mem-intrinsic (name c-type ret-type c-name &key (public t) diff --git a/contrib/cl-simd/sbcl-arrays.lisp b/contrib/cl-simd/sbcl-arrays.lisp index a2bff1b36..0e9ad26b6 100644 --- a/contrib/cl-simd/sbcl-arrays.lisp +++ b/contrib/cl-simd/sbcl-arrays.lisp @@ -157,7 +157,7 @@ Should be assumed to be SIMPLE-ARRAY, except that displacing with MAKE-SSE-ARRAY (array-bounding-indices-bad-error ,array ,index (+ ,index ,access-size))) ,@code)))))) -(defun sse-array-info-or-give-up (lvar) +(defun sse-array-info-or-give-up (lvar ref-size) ;; Look up the SSE element size and check if it is definitely a vector (let ((type (lvar-type lvar))) (unless (and (array-type-p type) @@ -170,12 +170,12 @@ Should be assumed to be SIMPLE-ARRAY, except that displacing with MAKE-SSE-ARRAY (unless shift (give-up-ir1-transform "not a known SSE-compatible array element type: ~S" (type-specifier etype))) - (values (ash 1 shift) ; step - (1- (ash 16 (- shift))) ; gap + (values (ash 1 shift) ; step + (ash (1- ref-size) (- shift)) ; gap (and (listp (array-type-dimensions type)) (if (null (cdr (array-type-dimensions type))) :yes :no)))))) -(defmacro def-aref-intrinsic (postfix rtype reader writer &key (check-bounds t)) +(defmacro def-aref-intrinsic (postfix rtype reader writer &key (ref-size 16)) (let* ((rm-aref (symbolicate "ROW-MAJOR-AREF-" postfix)) (rm-aset (if writer (symbolicate "ROW-MAJOR-ASET-" postfix))) (aref (symbolicate "AREF-" postfix)) @@ -186,10 +186,9 @@ Should be assumed to be SIMPLE-ARRAY, except that displacing with MAKE-SSE-ARRAY (writer/ix-vop (if writer (symbolicate "%" writer "/IX"))) (rtype (or rtype '(values))) (index-expression - (ecase check-bounds - (t ``(the index (%check-bound array (%sse-array-size array ,gap) index))) - ((nil) ``(the index index)) - (:no-gap ``(the index (%check-bound array (%sse-array-size array 0) index)))))) + (if (= ref-size 0) + ``(the signed-word index) + ``(the signed-word (%check-bound array (%sse-array-size array ,gap) index))))) `(progn ;; ROW-MAJOR-AREF (export ',rm-aref) @@ -197,11 +196,11 @@ Should be assumed to be SIMPLE-ARRAY, except that displacing with MAKE-SSE-ARRAY (defun ,rm-aref (array index) (with-sse-data ((sap data array) (offset index)) - (,reader-vop sap offset))) + (,reader-vop sap offset 1 0))) ;; (deftransform ,rm-aref ((array index) (simple-array t) * :important t) ,(format nil "open-code ~A" rm-aref) - (multiple-value-bind (step gap is-vector) (sse-array-info-or-give-up array) + (multiple-value-bind (step gap is-vector) (sse-array-info-or-give-up array ,ref-size) (declare (ignorable gap)) `(,',reader/ix-vop (array-data-expr array ,is-vector) ,,index-expression @@ -213,14 +212,14 @@ Should be assumed to be SIMPLE-ARRAY, except that displacing with MAKE-SSE-ARRAY (declare (truly-dynamic-extent indices)) (with-sse-data ((sap data array) (offset (%array-row-major-index array indices))) - (,reader-vop sap offset))) + (,reader-vop sap offset 1 0))) ;; (defoptimizer (,aref derive-type) ((array &rest indices) node) (assert-array-rank array (length indices)) (values-specifier-type ',rtype)) (deftransform ,aref ((array &rest indices) (simple-array &rest t) * :important t) ,(format nil "open-code ~A" aref) - (multiple-value-bind (step gap is-vector) (sse-array-info-or-give-up array) + (multiple-value-bind (step gap is-vector) (sse-array-info-or-give-up array ,ref-size) (declare (ignorable gap)) (let ((syms (make-gensym-list (length indices)))) `(lambda (array ,@syms) @@ -236,12 +235,12 @@ Should be assumed to be SIMPLE-ARRAY, except that displacing with MAKE-SSE-ARRAY (defun ,rm-aset (array index new-value) (with-sse-data ((sap data array) (offset index)) - (,writer-vop sap offset (the ,rtype new-value)) + (,writer-vop sap offset 1 0 (the ,rtype new-value)) new-value)) ;; (deftransform ,rm-aset ((array index value) (simple-array t t) * :important t) ,(format nil "open-code ~A" rm-aset) - (multiple-value-bind (step gap is-vector) (sse-array-info-or-give-up array) + (multiple-value-bind (step gap is-vector) (sse-array-info-or-give-up array ,ref-size) (declare (ignorable gap)) `(progn (,',writer/ix-vop (array-data-expr array ,is-vector) @@ -256,7 +255,7 @@ Should be assumed to be SIMPLE-ARRAY, except that displacing with MAKE-SSE-ARRAY (let ((new-value (car (last stuff)))) (with-sse-data ((sap data array) (offset (%array-row-major-index array (nbutlast stuff)))) - (,writer-vop sap offset (the ,rtype new-value)) + (,writer-vop sap offset 1 0 (the ,rtype new-value)) new-value))) ;; (defoptimizer (,aset derive-type) ((array &rest stuff) node) @@ -266,12 +265,12 @@ Should be assumed to be SIMPLE-ARRAY, except that displacing with MAKE-SSE-ARRAY (specifier-type ',rtype)) (deftransform ,aset ((array &rest stuff) (simple-array &rest t) * :important t) ,(format nil "open-code ~A" aset) - (multiple-value-bind (step gap is-vector) (sse-array-info-or-give-up array) + (multiple-value-bind (step gap is-vector) (sse-array-info-or-give-up array ,ref-size) (declare (ignorable gap)) (let ((syms (make-gensym-list (length stuff)))) `(lambda (array ,@syms) - (let ((index (the index ,(if (eq is-vector :yes) (first syms) - `(array-row-major-index array ,@(butlast syms)))))) + (let ((index ,(if (eq is-vector :yes) (first syms) + `(array-row-major-index array ,@(butlast syms))))) (,',writer/ix-vop (array-data-expr array ,is-vector) ,,index-expression ,step ,+vector-data-fixup+ diff --git a/contrib/cl-simd/sbcl-core.lisp b/contrib/cl-simd/sbcl-core.lisp index c32b9d032..3d9124f18 100644 --- a/contrib/cl-simd/sbcl-core.lisp +++ b/contrib/cl-simd/sbcl-core.lisp @@ -59,27 +59,55 @@ ;;; Index-offset splicing -(defun fold-index-addressing (fun-name index scale &key setter-p) - (multiple-value-bind (func index-args) (extract-fun-args index '(+ - * ash) 2) +(defun skip-casts (lvar) + (let ((inside (lvar-uses lvar))) + (if (and (cast-p inside) + (policy inside (= sb-c::type-check 0))) + (skip-casts (cast-value inside)) + lvar))) + +(defun delete-casts (lvar) + (loop for inside = (lvar-uses lvar) + while (cast-p inside) + do (delete-filter inside lvar (cast-value inside)))) + +(defun fold-index-addressing (fun-name index scale offset &key prefix-args postfix-args) + (multiple-value-bind (func index-args) + (extract-fun-args (skip-casts index) '(+ - * ash) 2) (destructuring-bind (x constant) index-args (declare (ignorable x)) (unless (constant-lvar-p constant) (give-up-ir1-transform)) (let ((value (lvar-value constant)) - (scale-value (lvar-value scale))) - (case func - (* (unless (typep (* value scale-value) '(signed-byte 32)) - (give-up-ir1-transform "constant is too large for inlining"))) - (ash (unless (and (>= value 0) - (typep (ash scale-value value) '(signed-byte 32))) - (give-up-ir1-transform "index shift is unsuitable for inlining")))) - (splice-fun-args index func 2) - (let* ((value-arg (when setter-p '(value))) - (is-scale (member func '(* ash))) - (new-scale (if is-scale `(,func scale const) 'scale)) - (new-offset (if is-scale 'offset `(,func offset (* const scale))))) - `(lambda (thing index const scale offset ,@value-arg) - (,fun-name thing index ,new-scale ,new-offset ,@value-arg))))))) + (scale-value (lvar-value scale)) + (offset-value (lvar-value offset))) + (unless (integerp value) + (give-up-ir1-transform)) + (multiple-value-bind (new-scale new-offset) + (ecase func + (+ (values scale-value (+ offset-value (* value scale-value)))) + (- (values scale-value (- offset-value (* value scale-value)))) + (* (values (* scale-value value) offset-value)) + (ash (unless (>= value 0) + (give-up-ir1-transform "negative index shift")) + (values (ash scale-value value) offset-value))) + (unless (and (typep new-scale '(signed-byte 32)) + (typep new-offset 'signed-word)) + (give-up-ir1-transform "constant is too large for inlining")) + (delete-casts index) + (splice-fun-args index func 2) + `(lambda (,@prefix-args thing index const scale offset ,@postfix-args) + (declare (ignore const scale offset)) + (,fun-name ,@prefix-args thing (the signed-word index) ,new-scale ,new-offset ,@postfix-args))))))) + +(deftransform fold-ref-index-addressing ((thing index scale offset) * * :defun-only t :node node) + (fold-index-addressing (lvar-fun-name (basic-combination-fun node)) index scale offset)) + +(deftransform fold-xmm-ref-index-addressing ((value thing index scale offset) * * :defun-only t :node node) + (fold-index-addressing (lvar-fun-name (basic-combination-fun node)) index scale offset :prefix-args '(value))) + +(deftransform fold-set-index-addressing ((thing index scale offset value) * * :defun-only t :node node) + (fold-index-addressing (lvar-fun-name (basic-combination-fun node)) index scale offset :postfix-args '(value))) ;;; Index-offset addressing @@ -367,8 +395,8 @@ May emit additional instructions using the temporary register." (iv :scs (unsigned-reg unsigned-stack immediate))) (:arg-types sse-pack unsigned-num)) -(defmacro def-sse-int-intrinsic (&whole whole name itype rtype insn cost c-name &key make-temporary immediate-arg) - (declare (ignore c-name)) +(defmacro def-sse-int-intrinsic (&whole whole name itype rtype insn cost c-name &key make-temporary immediate-arg defun-body) + (declare (ignore c-name defun-body)) (let* ((imm (if immediate-arg '(imm))) (immt (if immediate-arg (list immediate-arg))) (unsigned? (subtypep itype 'unsigned-byte))) @@ -393,6 +421,40 @@ May emit additional instructions using the temporary register." make-temporary) (inst ,insn r ,(if make-temporary 'tmp '(ensure-reg-or-mem iv)) ,@imm)))))) +;;; Comparison predicate intrinsics + +(define-vop (sse-comparison-op) + (:args (x :scs (sse-reg)) + (y :scs (sse-reg sse-pack-immediate))) + (:arg-types sse-pack sse-pack) + (:policy :fast-safe) + (:note "inline SSE binary comparison predicate") + (:vop-var vop) + (:save-p :compute-only)) + +(define-vop (sse-comparison-comm-op sse-comparison-op) + (:args (x :scs (sse-reg) + :load-if (not (and (sc-is x sse-pack-immediate) + (sc-is y sse-reg)))) + (y :scs (sse-reg sse-pack-immediate)))) + +(defmacro def-comparison-intrinsic (&whole whole name arg-type insn cost c-name &key commutative tags) + (declare (ignore arg-type c-name)) + (let* () + `(progn + (export ',name) + (save-intrinsic-spec ,name ,whole) + (defknown ,name (sse-pack sse-pack) boolean (foldable flushable)) + (define-vop (,name ,(if commutative 'sse-comparison-comm-op 'sse-comparison-op)) + (:translate ,name) + (:conditional ,@tags) + (:generator ,cost + ,(if commutative + `(if (sc-is x sse-reg) + (inst ,insn x y) + (inst ,insn y x)) + `(inst ,insn x y))))))) + ;;; Memory intrinsics (define-vop (sse-load-base-op) @@ -401,40 +463,50 @@ May emit additional instructions using the temporary register." (:note "inline SSE load operation")) (define-vop (sse-load-op sse-load-base-op) - (:args (sap :scs (sap-reg)) - (offset :scs (signed-reg))) - (:arg-types system-area-pointer signed-num)) + (:args (sap :scs (sap-reg) :to :eval) + (index :scs (signed-reg immediate) :target tmp)) + (:arg-types system-area-pointer signed-num + (:constant fixnum) (:constant signed-word)) + (:temporary (:sc signed-reg :from (:argument 1)) tmp) + (:info scale offset)) + +(define-vop (sse-load-op/tag sse-load-base-op) + (:args (sap :scs (sap-reg) :to :eval) + (index :scs (any-reg signed-reg immediate) :target tmp)) + (:arg-types system-area-pointer tagged-num + (:constant tagged-load-scale) (:constant signed-word)) + (:temporary (:sc any-reg :from (:argument 1)) tmp) + (:info scale offset)) (define-vop (sse-xmm-load-op sse-load-base-op) (:args (value :scs (sse-reg sse-pack-immediate) :target r) - (sap :scs (sap-reg)) - (offset :scs (signed-reg))) - (:arg-types sse-pack system-area-pointer signed-num)) + (sap :scs (sap-reg) :to :eval) + (index :scs (signed-reg immediate) :target tmp)) + (:arg-types sse-pack system-area-pointer signed-num + (:constant fixnum) (:constant signed-word)) + (:temporary (:sc signed-reg :from (:argument 2)) tmp) + (:info scale offset)) -(define-vop (sse-load-imm-op sse-load-base-op) - (:args (sap :scs (sap-reg))) - (:arg-types system-area-pointer - (:constant (signed-byte 32))) - (:info offset)) - -(define-vop (sse-xmm-load-imm-op sse-load-base-op) +(define-vop (sse-xmm-load-op/tag sse-load-base-op) (:args (value :scs (sse-reg sse-pack-immediate) :target r) - (sap :scs (sap-reg))) - (:arg-types sse-pack system-area-pointer - (:constant (signed-byte 32))) - (:info offset)) + (sap :scs (sap-reg) :to :eval) + (index :scs (any-reg signed-reg immediate) :target tmp)) + (:arg-types sse-pack system-area-pointer tagged-num + (:constant tagged-load-scale) (:constant signed-word)) + (:temporary (:sc any-reg :from (:argument 2)) tmp) + (:info scale offset)) (define-vop (sse-load-ix-op sse-load-base-op) (:args (sap :scs (descriptor-reg) :to :eval) (index :scs (signed-reg immediate) :target tmp)) - (:arg-types * signed-num (:constant fixnum) (:constant fixnum)) + (:arg-types * signed-num (:constant fixnum) (:constant signed-word)) (:temporary (:sc signed-reg :from (:argument 1)) tmp) (:info scale offset)) (define-vop (sse-load-ix-op/tag sse-load-base-op) (:args (sap :scs (descriptor-reg) :to :eval) (index :scs (any-reg signed-reg immediate) :target tmp)) - (:arg-types * tagged-num (:constant tagged-load-scale) (:constant fixnum)) + (:arg-types * tagged-num (:constant tagged-load-scale) (:constant signed-word)) (:temporary (:sc any-reg :from (:argument 1)) tmp) (:info scale offset)) @@ -442,10 +514,8 @@ May emit additional instructions using the temporary register." &key register-arg tags postfix-fmt (size :qword)) (declare (ignore c-name postfix-fmt)) (let* ((vop (symbolicate "%" name)) - (c-vop (symbolicate vop "-C")) (ix-vop (symbolicate vop "/IX")) (valtype (if register-arg '(sse-pack))) - (valarg (if register-arg '(value))) (r-arg (if rtype '(r))) (rtypes (if rtype `(:result-types ,(type-name-to-primitive rtype)) @@ -454,24 +524,26 @@ May emit additional instructions using the temporary register." `(progn (export ',name) (save-intrinsic-spec ,name ,whole) - (defknown ,vop (,@valtype system-area-pointer fixnum) ,(or rtype '(values)) (flushable always-translatable)) + (defknown ,vop (,@valtype system-area-pointer signed-word fixnum signed-word) + ,(or rtype '(values)) (flushable always-translatable)) (define-vop (,vop ,(if register-arg 'sse-xmm-load-op 'sse-load-op)) (:translate ,vop) ,rtypes (:generator 5 ,(if register-arg `(ensure-load ,rtype r value)) - (inst ,insn ,@tags ,@r-arg (make-ea ,size :base sap :index offset)))) - (define-vop (,c-vop ,(if register-arg 'sse-xmm-load-imm-op 'sse-load-imm-op)) + (inst ,insn ,@tags ,@r-arg (make-scaled-ea ,size sap index scale offset tmp)))) + (define-vop (,(symbolicate vop "/TAG") ,(if register-arg 'sse-xmm-load-op/tag 'sse-load-op/tag)) (:translate ,vop) ,rtypes (:generator 4 ,(if register-arg `(ensure-load ,rtype r value)) - (inst ,insn ,@tags ,@r-arg (make-ea ,size :base sap :disp offset)))) - (def-splice-transform ,vop (,@valarg (sap+ sap offset1) offset2) - (,vop ,@valarg sap (+ offset1 offset2))) + (inst ,insn ,@tags ,@r-arg (make-scaled-ea ,size sap index scale offset tmp :fixnum-index t)))) + (%deftransform ',vop '(function * *) + #',(if register-arg 'fold-xmm-ref-index-addressing 'fold-ref-index-addressing) + "fold semi-constant offset expressions") ,@(if (null register-arg) `(;; Vector indexing version - (defknown ,ix-vop (simple-array fixnum fixnum fixnum) ,(or rtype '(values)) + (defknown ,ix-vop (simple-array signed-word fixnum signed-word) ,(or rtype '(values)) (flushable always-translatable)) (define-vop (,ix-vop sse-load-ix-op) (:translate ,ix-vop) @@ -483,33 +555,34 @@ May emit additional instructions using the temporary register." ,rtypes (:generator 3 (inst ,insn ,@tags ,@r-arg (make-scaled-ea ,size sap index scale offset tmp :fixnum-index t)))) - (deftransform ,ix-vop ((thing index scale offset)) - "fold semi-constant index expressions" - (fold-index-addressing ',ix-vop index scale))))))) + (%deftransform ',ix-vop '(function * *) #'fold-ref-index-addressing + "fold semi-constant index expressions")))))) (define-vop (sse-store-base-op) (:policy :fast-safe) (:note "inline SSE store operation")) (define-vop (sse-store-op sse-store-base-op) - (:args (sap :scs (sap-reg)) - (offset :scs (signed-reg)) + (:args (sap :scs (sap-reg) :to :eval) + (index :scs (signed-reg immediate) :target tmp) (value :scs (sse-reg))) - (:arg-types system-area-pointer signed-num sse-pack)) + (:arg-types system-area-pointer signed-num (:constant fixnum) (:constant signed-word) sse-pack) + (:temporary (:sc signed-reg :from (:argument 1)) tmp) + (:info scale offset)) -(define-vop (sse-store-imm-op sse-store-base-op) - (:args (sap :scs (sap-reg)) +(define-vop (sse-store-op/tag sse-store-base-op) + (:args (sap :scs (sap-reg) :to :eval) + (index :scs (any-reg signed-reg immediate) :target tmp) (value :scs (sse-reg))) - (:arg-types system-area-pointer - (:constant (signed-byte 32)) - sse-pack) - (:info offset)) + (:arg-types system-area-pointer tagged-num (:constant tagged-load-scale) (:constant signed-word) sse-pack) + (:temporary (:sc any-reg :from (:argument 1)) tmp) + (:info scale offset)) (define-vop (sse-store-ix-op sse-store-base-op) (:args (sap :scs (descriptor-reg) :to :eval) (index :scs (signed-reg immediate) :target tmp) (value :scs (sse-reg))) - (:arg-types * signed-num (:constant fixnum) (:constant fixnum) sse-pack) + (:arg-types * signed-num (:constant fixnum) (:constant signed-word) sse-pack) (:temporary (:sc signed-reg :from (:argument 1)) tmp) (:info scale offset)) @@ -517,31 +590,31 @@ May emit additional instructions using the temporary register." (:args (sap :scs (descriptor-reg) :to :eval) (index :scs (any-reg signed-reg immediate) :target tmp) (value :scs (sse-reg))) - (:arg-types * tagged-num (:constant tagged-load-scale) (:constant fixnum) sse-pack) + (:arg-types * tagged-num (:constant tagged-load-scale) (:constant signed-word) sse-pack) (:temporary (:sc any-reg :from (:argument 1)) tmp) (:info scale offset)) (defmacro def-store-intrinsic (&whole whole name rtype insn c-name &key setf-name) (declare (ignore rtype c-name)) (let* ((vop (symbolicate "%" name)) - (c-vop (symbolicate vop "-C")) (ix-vop (symbolicate vop "/IX"))) `(progn ,(unless setf-name `(export ',name)) (save-intrinsic-spec ,name ,whole) - (defknown ,vop (system-area-pointer fixnum sse-pack) (values) (unsafe always-translatable)) + (defknown ,vop (system-area-pointer signed-word fixnum signed-word sse-pack) (values) + (unsafe always-translatable)) (define-vop (,vop sse-store-op) (:translate ,vop) (:generator 5 - (inst ,insn (make-ea :qword :base sap :index offset) value))) - (define-vop (,c-vop sse-store-imm-op) + (inst ,insn (make-scaled-ea :qword sap index scale offset tmp) value))) + (define-vop (,(symbolicate vop "/TAG") sse-store-op/tag) (:translate ,vop) (:generator 4 - (inst ,insn (make-ea :qword :base sap :disp offset) value))) - (def-splice-transform ,vop ((sap+ sap offset1) offset2 new-value) - (,vop sap (+ offset1 offset2) new-value)) + (inst ,insn (make-scaled-ea :qword sap index scale offset tmp :fixnum-index t) value))) + (%deftransform ',vop '(function * *) #'fold-set-index-addressing + "fold semi-constant offset expressions") ;; Vector indexing version - (defknown ,ix-vop (simple-array fixnum fixnum fixnum sse-pack) (values) + (defknown ,ix-vop (simple-array signed-word fixnum signed-word sse-pack) (values) (unsafe always-translatable)) (define-vop (,ix-vop sse-store-ix-op) (:translate ,ix-vop) @@ -551,7 +624,6 @@ May emit additional instructions using the temporary register." (:translate ,ix-vop) (:generator 3 (inst ,insn (make-scaled-ea :qword sap index scale offset tmp :fixnum-index t) value))) - (deftransform ,ix-vop ((thing index scale offset value)) - "fold semi-constant index expressions" - (fold-index-addressing ',ix-vop index scale :setter-p t))))) + (%deftransform ',ix-vop '(function * *) #'fold-set-index-addressing + "fold semi-constant index expressions")))) diff --git a/contrib/cl-simd/sbcl-functions.lisp b/contrib/cl-simd/sbcl-functions.lisp index 0c836cff6..a3f745d21 100644 --- a/contrib/cl-simd/sbcl-functions.lisp +++ b/contrib/cl-simd/sbcl-functions.lisp @@ -45,6 +45,11 @@ (declare (type sse-pack x) (type ,itype iv)) (truly-the ,rtype (%primitive ,name x iv))))) + (def-comparison-intrinsic (name arg-type insn cost c-name &key &allow-other-keys) + (declare (ignore insn cost c-name arg-type)) + `(defun ,name (x y) + (declare (type sse-pack x y)) + (truly-the boolean (,name x y)))) (def-load-intrinsic (name rtype insn c-name &key register-arg &allow-other-keys) (declare (ignore insn c-name)) (let* ((vop (symbolicate "%" name)) @@ -54,10 +59,10 @@ (defun ,name (,@valarg pointer &optional (offset 0)) (declare ,@(if register-arg '((type sse-pack value))) (type system-area-pointer pointer) - (type fixnum offset)) + (type signed-word offset)) ,(if rtype - `(truly-the ,rtype (,vop ,@valarg pointer offset)) - `(,vop ,@valarg pointer offset)))))) + `(truly-the ,rtype (,vop ,@valarg pointer offset 1 0)) + `(,vop ,@valarg pointer offset 1 0)))))) (def-store-intrinsic (name rtype insn c-name &key setf-name &allow-other-keys) (declare (ignore insn c-name)) (let* ((vop (symbolicate "%" name))) @@ -66,8 +71,8 @@ (defun ,name (pointer value &optional (offset 0)) (declare (type system-area-pointer pointer) (type sse-pack value) - (type fixnum offset)) - (,vop pointer offset value) + (type signed-word offset)) + (,vop pointer offset 1 0 value) (truly-the ,rtype value)) ,(if setf-name `(defsetf ,setf-name (pointer &optional (offset 0)) (value) diff --git a/contrib/cl-simd/sse-array-defs.lisp b/contrib/cl-simd/sse-array-defs.lisp index 040f1ea39..3613b7776 100644 --- a/contrib/cl-simd/sse-array-defs.lisp +++ b/contrib/cl-simd/sse-array-defs.lisp @@ -10,15 +10,19 @@ ;;; Prefetch: AREF-PREFETCH-*, ROW-MAJOR-AREF-PREFETCH-* -(def-aref-intrinsic #:PREFETCH-T0 nil cpu-prefetch-t0 nil :check-bounds nil) -(def-aref-intrinsic #:PREFETCH-T1 nil cpu-prefetch-t1 nil :check-bounds nil) -(def-aref-intrinsic #:PREFETCH-T2 nil cpu-prefetch-t2 nil :check-bounds nil) -(def-aref-intrinsic #:PREFETCH-NTA nil cpu-prefetch-nta nil :check-bounds nil) +(def-aref-intrinsic #:PREFETCH-T0 nil cpu-prefetch-t0 nil :ref-size 0) +(def-aref-intrinsic #:PREFETCH-T1 nil cpu-prefetch-t1 nil :ref-size 0) +(def-aref-intrinsic #:PREFETCH-T2 nil cpu-prefetch-t2 nil :ref-size 0) +(def-aref-intrinsic #:PREFETCH-NTA nil cpu-prefetch-nta nil :ref-size 0) -(def-aref-intrinsic #:CLFLUSH nil cpu-clflush nil :check-bounds :no-gap) +(def-aref-intrinsic #:CLFLUSH nil cpu-clflush nil :ref-size 1) ;;; Single-float +;; AREF-SS, ROW-MAJOR-AREF-SS + +(def-aref-intrinsic #:SS float-sse-pack mem-ref-ss mem-set-ss :ref-size 4) + ;; AREF-PS, ROW-MAJOR-AREF-PS (def-aref-intrinsic #:PS float-sse-pack mem-ref-ps mem-set-ps) @@ -33,6 +37,10 @@ ;;; Double-float +;; AREF-SD, ROW-MAJOR-AREF-SD + +(def-aref-intrinsic #:SD double-sse-pack mem-ref-sd mem-set-sd :ref-size 8) + ;; AREF-PD, ROW-MAJOR-AREF-PD (def-aref-intrinsic #:PD double-sse-pack mem-ref-pd mem-set-pd) @@ -47,6 +55,10 @@ ;;; Integer +;; AREF-SI64, ROW-MAJOR-AREF-SI64 + +(def-aref-intrinsic #:SI64 int-sse-pack mem-ref-si64 mem-set-si64 :ref-size 8) + ;; AREF-PI, ROW-MAJOR-AREF-PI (def-aref-intrinsic #:PI int-sse-pack mem-ref-pi mem-set-pi) diff --git a/contrib/cl-simd/sse-intrinsics.lisp b/contrib/cl-simd/sse-intrinsics.lisp index a92586a70..6592fd703 100644 --- a/contrib/cl-simd/sse-intrinsics.lisp +++ b/contrib/cl-simd/sse-intrinsics.lisp @@ -198,7 +198,18 @@ (def-binary-intrinsic cmpunord-ss float-sse-pack cmpss 3 "_mm_cmpunord_ss" :tags (:unord)) (def-binary-intrinsic cmpunord-ps float-sse-pack cmpps 3 "_mm_cmpunord_ps" :tags (:unord) :commutative t) -#| Skipped: _mm_u?comi.*_ss |# +(def-comparison-intrinsic =-ss? float-sse-pack comiss 3 "_mm_comieq_ss" :commutative t :tags (:e)) +(def-comparison-intrinsic =-ssu? float-sse-pack ucomiss 3 "_mm_ucomieq_ss" :commutative t :tags (:e)) +(def-comparison-intrinsic <-ss? float-sse-pack comiss 3 "_mm_comilt_ss" :tags (:b)) +(def-comparison-intrinsic <-ssu? float-sse-pack ucomiss 3 "_mm_ucomilt_ss" :tags (:b)) +(def-comparison-intrinsic <=-ss? float-sse-pack comiss 3 "_mm_comile_ss" :tags (:be)) +(def-comparison-intrinsic <=-ssu? float-sse-pack ucomiss 3 "_mm_ucomile_ss" :tags (:be)) +(def-comparison-intrinsic >-ss? float-sse-pack comiss 3 "_mm_comigt_ss" :tags (:a)) +(def-comparison-intrinsic >-ssu? float-sse-pack ucomiss 3 "_mm_ucomigt_ss" :tags (:a)) +(def-comparison-intrinsic >=-ss? float-sse-pack comiss 3 "_mm_comige_ss" :tags (:ae)) +(def-comparison-intrinsic >=-ssu? float-sse-pack ucomiss 3 "_mm_ucomige_ss" :tags (:ae)) +(def-comparison-intrinsic /=-ss? float-sse-pack comiss 3 "_mm_comineq_ss" :commutative t :tags (:ne)) +(def-comparison-intrinsic /=-ssu? float-sse-pack ucomiss 3 "_mm_ucomineq_ss" :commutative t :tags (:ne)) ;; Misc @@ -338,6 +349,19 @@ (def-binary-intrinsic cmpunord-sd double-sse-pack cmpsd 3 "_mm_cmpunord_sd" :tags (:unord)) (def-binary-intrinsic cmpunord-pd double-sse-pack cmppd 3 "_mm_cmpunord_pd" :tags (:unord) :commutative t) +(def-comparison-intrinsic =-sd? double-sse-pack comisd 3 "_mm_comieq_sd" :commutative t :tags (:e)) +(def-comparison-intrinsic =-sdu? double-sse-pack ucomisd 3 "_mm_ucomieq_sd" :commutative t :tags (:e)) +(def-comparison-intrinsic <-sd? double-sse-pack comisd 3 "_mm_comilt_sd" :tags (:b)) +(def-comparison-intrinsic <-sdu? double-sse-pack ucomisd 3 "_mm_ucomilt_sd" :tags (:b)) +(def-comparison-intrinsic <=-sd? double-sse-pack comisd 3 "_mm_comile_sd" :tags (:be)) +(def-comparison-intrinsic <=-sdu? double-sse-pack ucomisd 3 "_mm_ucomile_sd" :tags (:be)) +(def-comparison-intrinsic >-sd? double-sse-pack comisd 3 "_mm_comigt_sd" :tags (:a)) +(def-comparison-intrinsic >-sdu? double-sse-pack ucomisd 3 "_mm_ucomigt_sd" :tags (:a)) +(def-comparison-intrinsic >=-sd? double-sse-pack comisd 3 "_mm_comige_sd" :tags (:ae)) +(def-comparison-intrinsic >=-sdu? double-sse-pack ucomisd 3 "_mm_ucomige_sd" :tags (:ae)) +(def-comparison-intrinsic /=-sd? double-sse-pack comisd 3 "_mm_comineq_sd" :commutative t :tags (:ne)) +(def-comparison-intrinsic /=-sdu? double-sse-pack ucomisd 3 "_mm_ucomineq_sd" :commutative t :tags (:ne)) + ;; Misc (def-binary-intrinsic unpackhi-pd double-sse-pack unpckhpd 1 "_mm_unpackhi_pd") @@ -541,23 +565,31 @@ (def-unary-intrinsic slli-pi int-sse-pack pslldq 1 "_mm_slli_si128" :partial :one-arg :immediate-arg (unsigned-byte 8)) -(def-sse-int-intrinsic slli-pi16 fixnum int-sse-pack psllw 3 "_mm_slli_epi16" :make-temporary t) -(def-sse-int-intrinsic slli-pi32 fixnum int-sse-pack pslld 3 "_mm_slli_epi32" :make-temporary t) -(def-sse-int-intrinsic slli-pi64 fixnum int-sse-pack psllq 3 "_mm_slli_epi64" :make-temporary t) +(def-sse-int-intrinsic slli-pi16 fixnum int-sse-pack psllw 3 "_mm_slli_epi16" :make-temporary t + :defun-body "_mm_sll_epi16(#0,_mm_cvtsi32_si128(#1))") +(def-sse-int-intrinsic slli-pi32 fixnum int-sse-pack pslld 3 "_mm_slli_epi32" :make-temporary t + :defun-body "_mm_sll_epi32(#0,_mm_cvtsi32_si128(#1))") +(def-sse-int-intrinsic slli-pi64 fixnum int-sse-pack psllq 3 "_mm_slli_epi64" :make-temporary t + :defun-body "_mm_sll_epi64(#0,_mm_cvtsi32_si128(#1))") (def-binary-intrinsic sll-pi16 int-sse-pack psllw 1 "_mm_sll_epi16") (def-binary-intrinsic sll-pi32 int-sse-pack pslld 1 "_mm_sll_epi32") (def-binary-intrinsic sll-pi64 int-sse-pack psllq 1 "_mm_sll_epi64") -(def-sse-int-intrinsic srai-pi16 fixnum int-sse-pack psraw 3 "_mm_srai_epi16" :make-temporary t) -(def-sse-int-intrinsic srai-pi32 fixnum int-sse-pack psrad 3 "_mm_srai_epi32" :make-temporary t) +(def-sse-int-intrinsic srai-pi16 fixnum int-sse-pack psraw 3 "_mm_srai_epi16" :make-temporary t + :defun-body "_mm_sra_epi16(#0,_mm_cvtsi32_si128(#1))") +(def-sse-int-intrinsic srai-pi32 fixnum int-sse-pack psrad 3 "_mm_srai_epi32" :make-temporary t + :defun-body "_mm_sra_epi32(#0,_mm_cvtsi32_si128(#1))") (def-binary-intrinsic sra-pi16 int-sse-pack psraw 1 "_mm_sra_epi16") (def-binary-intrinsic sra-pi32 int-sse-pack psrad 1 "_mm_sra_epi32") (def-unary-intrinsic srli-pi int-sse-pack psrldq 1 "_mm_srli_si128" :partial :one-arg :immediate-arg (unsigned-byte 8)) -(def-sse-int-intrinsic srli-pi16 fixnum int-sse-pack psrlw 3 "_mm_srli_epi16" :make-temporary t) -(def-sse-int-intrinsic srli-pi32 fixnum int-sse-pack psrld 3 "_mm_srli_epi32" :make-temporary t) -(def-sse-int-intrinsic srli-pi64 fixnum int-sse-pack psrlq 3 "_mm_srli_epi64" :make-temporary t) +(def-sse-int-intrinsic srli-pi16 fixnum int-sse-pack psrlw 3 "_mm_srli_epi16" :make-temporary t + :defun-body "_mm_srl_epi16(#0,_mm_cvtsi32_si128(#1))") +(def-sse-int-intrinsic srli-pi32 fixnum int-sse-pack psrld 3 "_mm_srli_epi32" :make-temporary t + :defun-body "_mm_srl_epi32(#0,_mm_cvtsi32_si128(#1))") +(def-sse-int-intrinsic srli-pi64 fixnum int-sse-pack psrlq 3 "_mm_srli_epi64" :make-temporary t + :defun-body "_mm_srl_epi64(#0,_mm_cvtsi32_si128(#1))") (def-binary-intrinsic srl-pi16 int-sse-pack psrlw 1 "_mm_srl_epi16") (def-binary-intrinsic srl-pi32 int-sse-pack psrld 1 "_mm_srl_epi32") (def-binary-intrinsic srl-pi64 int-sse-pack psrlq 1 "_mm_srl_epi64") diff --git a/contrib/cl-simd/sse-package.lisp b/contrib/cl-simd/sse-package.lisp index c87af2f81..c77027b40 100644 --- a/contrib/cl-simd/sse-package.lisp +++ b/contrib/cl-simd/sse-package.lisp @@ -30,8 +30,10 @@ #:GIVE-UP-IR1-TRANSFORM #:ABORT-IR1-TRANSFORM #:INSERT-ARRAY-BOUNDS-CHECKS #:VECTOR-LENGTH #:ASSERT-ARRAY-RANK #:ASSERT-LVAR-TYPE - #:CONSTANT-LVAR-P #:LVAR-VALUE #:LVAR-TYPE - #:LEXENV-POLICY #:NODE-LEXENV + #:CONSTANT-LVAR-P #:LVAR-VALUE #:LVAR-TYPE #:LVAR-USES + #:LVAR-FUN-NAME #:BASIC-COMBINATION-FUN + #:LEXENV-POLICY #:NODE-LEXENV #:POLICY + #:CAST-P #:CAST-VALUE #:DELETE-FILTER #:FIND-SAETP #:FIND-SAETP-BY-CTYPE) #+sbcl (:import-from #:SB-IMPL