mirror of
https://gitlab.com/embeddable-common-lisp/ecl.git
synced 2025-12-06 10:41:33 -08:00
398 lines
15 KiB
Common Lisp
398 lines
15 KiB
Common Lisp
;;; -*- mode: Lisp; indent-tabs-mode: nil; -*-
|
|
;;;
|
|
;;; Copyright (c) 2010, Alexander Gavrilov (angavrilov@gmail.com)
|
|
;;;
|
|
;;; This file defines some extensions to the base intrinsic set,
|
|
;;; and other utility functions.
|
|
;;;
|
|
|
|
(in-package #:SSE)
|
|
|
|
;;; Helper macros and functions
|
|
(eval-when (:compile-toplevel :load-toplevel :execute)
|
|
;; Try using a matching inverse function name
|
|
(defun lookup-flip (arg pairs &key no-reverse)
|
|
(and (consp arg)
|
|
(let ((fix (or (cdr (assoc (first arg) pairs))
|
|
(unless no-reverse
|
|
(car (rassoc (first arg) pairs))))))
|
|
(cond ((eq fix :identity)
|
|
(assert (null (cddr arg)))
|
|
(second arg))
|
|
(fix
|
|
`(,fix ,@(rest arg)))
|
|
(t nil)))))
|
|
;; Macroexpand, plus compiler expand some specific names
|
|
(defun expand-condition (form env)
|
|
(setq form (macroexpand form env))
|
|
(loop while (and (consp form)
|
|
(symbolp (first form))
|
|
(get (first form) 'expand-in-condition))
|
|
do (setq form (c::cmp-expand-macro (compiler-macro-function (first form))
|
|
form env)))
|
|
form)
|
|
;; Checks if the form is an unary call
|
|
(defun is-unary? (form op)
|
|
(and (consp form)
|
|
(eq (first form) op)
|
|
(null (cddr form))))
|
|
;; IF-style function expander
|
|
(defun expand-if-macro (condition then-value else-value env if-f not-f or-f and-f andnot-f type-name zero-val &key flip)
|
|
(let* ((condition (expand-condition condition env))
|
|
(then-value (macroexpand then-value env))
|
|
(else-value (macroexpand else-value env))
|
|
(then-zero? (equal then-value zero-val))
|
|
(else-zero? (equal else-value zero-val)))
|
|
(cond ((is-unary? condition not-f)
|
|
(expand-if-macro (second condition) else-value then-value
|
|
env if-f not-f or-f and-f andnot-f type-name zero-val
|
|
:flip (not flip)))
|
|
((and then-zero? else-zero?)
|
|
zero-val)
|
|
(then-zero?
|
|
`(,andnot-f ,condition ,else-value))
|
|
(else-zero?
|
|
`(,and-f ,condition ,then-value))
|
|
(t
|
|
(let* ((csym (gensym))
|
|
(args `((,and-f ,csym ,then-value)
|
|
(,andnot-f ,csym ,else-value))))
|
|
`(let ((,csym ,condition))
|
|
(declare (type ,type-name ,csym)
|
|
(:read-only ,csym))
|
|
(,or-f ,@(if flip (reverse args) args)))))))))
|
|
|
|
(defmacro def-utility (name arg-types ret-type expansion &key expand-args expand-in-condition)
|
|
"Defines and exports a function & compiler macro with the specified expansion."
|
|
(let* ((anames (mapcar #'make-arg-name (make-arg-nums arg-types))))
|
|
`(progn
|
|
(export ',name)
|
|
(eval-when (:compile-toplevel :load-toplevel)
|
|
,@(if expand-in-condition
|
|
`((setf (get ',name 'expand-in-condition) t)))
|
|
(define-compiler-macro ,name (&environment env ,@anames)
|
|
(declare (ignorable env))
|
|
,@(loop for arg in (if (eq expand-args t) anames expand-args)
|
|
collect `(setq ,arg (macroexpand ,arg env)))
|
|
,expansion))
|
|
(proclaim '(ftype (function ,(mapcar #'declaim-arg-type-of arg-types) ,ret-type) ,name))
|
|
(defun ,name ,anames
|
|
(declare (optimize (speed 0) (debug 0) (safety 1)))
|
|
(let ,(mapcar #'list anames anames)
|
|
(declare ,@(loop for an in anames and at in arg-types
|
|
collect `(type ,at ,an)))
|
|
;; Depends on the compiler macro being expanded:
|
|
(,name ,@anames))))))
|
|
|
|
(defmacro def-if-function (name type-name postfix)
|
|
`(def-utility ,name (,type-name ,type-name ,type-name) ,type-name
|
|
(expand-if-macro arg0 arg1 arg2 env
|
|
',name
|
|
',(intern (format nil "NOT-~A" postfix))
|
|
',(intern (format nil "OR-~A" postfix))
|
|
',(intern (format nil "AND-~A" postfix))
|
|
',(intern (format nil "ANDNOT-~A" postfix))
|
|
',type-name
|
|
'(,(intern (format nil "SETZERO-~A" postfix))))))
|
|
|
|
;;; Aligned array allocation
|
|
|
|
(deftype sse-array (elt-type &optional dims)
|
|
"Type of arrays efficiently accessed by SSE aref intrinsics and returned by make-sse-array.
|
|
Should be assumed to be SIMPLE-ARRAY, except that displacing with MAKE-SSE-ARRAY is allowed."
|
|
(when (eq elt-type '*)
|
|
(c::cmperr "SSE-ARRAY must have a specific element type."))
|
|
(let ((upgraded (upgraded-array-element-type elt-type)))
|
|
(when (member upgraded '(t bit))
|
|
(c::cmperr "Invalid SSE-ARRAY element type: ~S" elt-type))
|
|
(unless (subtypep upgraded elt-type)
|
|
(c::cmpwarn "SSE-ARRAY element type ~S has been upgraded to ~S" elt-type upgraded))
|
|
`(array ,upgraded ,dims)))
|
|
|
|
(defun make-sse-array (dimensions &rest args &key (element-type '(unsigned-byte 8)) displaced-to &allow-other-keys)
|
|
"Allocates an SSE-ARRAY aligned to the 16-byte boundary. May flatten displacement chains for performance reasons."
|
|
(if displaced-to
|
|
(apply #'make-array dimensions args)
|
|
(multiple-value-bind (elt-size adj-type)
|
|
(array-element-type-byte-size element-type)
|
|
(when (eq adj-type t)
|
|
(error "Cannot use element type T with SSE."))
|
|
(sys::remf args :element-type)
|
|
(let* ((full-size (if (numberp dimensions)
|
|
dimensions
|
|
(reduce #'* dimensions)))
|
|
(padded-size (+ full-size (ceiling 15 elt-size)))
|
|
(array (apply #'make-array padded-size :element-type adj-type args))
|
|
(misalign (ffi:c-inline (array) (:object) :int
|
|
"(((unsigned long)(#0)->array.self.b8) & 15)"
|
|
:one-liner t))
|
|
(offset (/ (if (> misalign 0) (- 16 misalign) 0) elt-size)))
|
|
(make-array dimensions :element-type element-type
|
|
:displaced-to array :displaced-index-offset offset)))))
|
|
|
|
;;; Single-float tools
|
|
|
|
;; Constants
|
|
|
|
(defmacro set-true-ss ()
|
|
(load-time-value (make-pack-of-bin #xFFFFFFFF :as 'float-sse-pack)))
|
|
|
|
(defmacro set-true-ps ()
|
|
(load-time-value (make-pack-of-bin -1 :as 'float-sse-pack)))
|
|
|
|
(eval-when (:compile-toplevel :load-toplevel)
|
|
(define-symbol-macro 0.0-ps (setzero-ps))
|
|
|
|
(define-symbol-macro true-ss (set-true-ss))
|
|
(define-symbol-macro false-ss (setzero-ps))
|
|
|
|
(define-symbol-macro true-ps (set-true-ps))
|
|
(define-symbol-macro false-ps (setzero-ps)))
|
|
|
|
;; Bitwise if
|
|
|
|
(def-if-function if-ps float-sse-pack #:ps)
|
|
|
|
;; Arithmetic negation (xor with negative zero)
|
|
|
|
(def-utility neg-ss (float-sse-pack) float-sse-pack
|
|
`(xor-ps ,arg0 ,(load-time-value (make-pack-of-bin #x80000000 :as 'float-sse-pack))))
|
|
|
|
(def-utility neg-ps (float-sse-pack) float-sse-pack
|
|
`(xor-ps ,arg0 ,(load-time-value
|
|
(make-pack-of-bin #x80000000800000008000000080000000 :as 'float-sse-pack))))
|
|
|
|
;; Logical inversion
|
|
|
|
(def-utility not-ps (float-sse-pack) float-sse-pack
|
|
(or (lookup-flip arg0 '((=-ps . /=-ps)
|
|
(<-ps . /<-ps)
|
|
(<=-ps . /<=-ps)
|
|
(>-ps . />-ps)
|
|
(>=-ps . />=-ps)
|
|
(cmpord-ps . cmpunord-ps)
|
|
(not-ps . :identity)))
|
|
`(xor-ps ,arg0 true-ps))
|
|
:expand-args t)
|
|
|
|
;; Shuffle
|
|
|
|
(defun shuffle-ps (x y mask)
|
|
(declare (optimize (speed 0) (debug 0) (safety 1))
|
|
(type t x y mask))
|
|
(check-type x sse-pack)
|
|
(check-type y sse-pack)
|
|
(check-type mask (unsigned-byte 8))
|
|
(ffi:c-inline (x y mask) (:object :object :int) :float-sse-pack
|
|
"_mm_setr_ps(
|
|
(#0)->sse.data.sf[(#2)&3],
|
|
(#0)->sse.data.sf[((#2)>>2)&3],
|
|
(#1)->sse.data.sf[((#2)>>4)&3],
|
|
(#1)->sse.data.sf[((#2)>>6)&3]
|
|
)" :one-liner t))
|
|
|
|
;;; Double-float tools
|
|
|
|
;; Constants
|
|
|
|
(defmacro set-true-sd ()
|
|
(load-time-value (make-pack-of-bin #xFFFFFFFFFFFFFFFF :as 'double-sse-pack)))
|
|
|
|
(defmacro set-true-pd ()
|
|
(load-time-value (make-pack-of-bin -1 :as 'double-sse-pack)))
|
|
|
|
(eval-when (:compile-toplevel :load-toplevel)
|
|
(define-symbol-macro 0.0-pd (setzero-pd))
|
|
|
|
(define-symbol-macro true-sd (set-true-sd))
|
|
(define-symbol-macro false-sd (setzero-pd))
|
|
|
|
(define-symbol-macro true-pd (set-true-pd))
|
|
(define-symbol-macro false-pd (setzero-pd)))
|
|
|
|
;; Bitwise if
|
|
|
|
(def-if-function if-pd double-sse-pack #:pd)
|
|
|
|
;; Arithmetic negation (xor with negative zero)
|
|
|
|
(def-utility neg-sd (double-sse-pack) double-sse-pack
|
|
`(xor-pd ,arg0
|
|
,(load-time-value
|
|
(make-pack-of-bin #x8000000000000000 :as 'double-sse-pack))))
|
|
|
|
(def-utility neg-pd (double-sse-pack) double-sse-pack
|
|
`(xor-pd ,arg0
|
|
,(load-time-value
|
|
(make-pack-of-bin #x80000000000000008000000000000000 :as 'double-sse-pack))))
|
|
|
|
;; Logical inversion
|
|
|
|
(def-utility not-pd (double-sse-pack) double-sse-pack
|
|
(or (lookup-flip arg0 '((=-pd . /=-pd)
|
|
(<-pd . /<-pd)
|
|
(<=-pd . /<=-pd)
|
|
(>-pd . />-pd)
|
|
(>=-pd . />=-pd)
|
|
(cmpord-pd . cmpunord-pd)
|
|
(not-pd . :identity)))
|
|
`(xor-pd ,arg0 true-pd))
|
|
:expand-args t)
|
|
|
|
;; Shuffle
|
|
|
|
(defun shuffle-pd (x y mask)
|
|
(declare (optimize (speed 0) (debug 0) (safety 1))
|
|
(type t x y mask))
|
|
(check-type x sse-pack)
|
|
(check-type y sse-pack)
|
|
(check-type mask (unsigned-byte 2))
|
|
(ffi:c-inline (x y mask) (:object :object :int) :double-sse-pack
|
|
"_mm_setr_pd(
|
|
(#0)->sse.data.df[(#2)&1],
|
|
(#1)->sse.data.df[((#2)>>1)&1]
|
|
)" :one-liner t))
|
|
|
|
;;; Integer tools
|
|
|
|
;; Constants
|
|
|
|
(defmacro set-true-pi ()
|
|
(load-time-value (make-pack-of-bin -1 :as 'int-sse-pack)))
|
|
|
|
(eval-when (:compile-toplevel :load-toplevel)
|
|
(define-symbol-macro 0-pi (setzero-pi))
|
|
|
|
(define-symbol-macro true-pi (set-true-pi))
|
|
(define-symbol-macro false-pi (setzero-pi)))
|
|
|
|
;; Bitwise if
|
|
|
|
(def-if-function if-pi float-sse-pack #:pi)
|
|
|
|
;; Arithmetic negation (subtract from 0)
|
|
|
|
(macrolet ((frob (name subf)
|
|
`(def-utility ,name (int-sse-pack) int-sse-pack
|
|
`(,',subf (setzero-pi) ,arg0))))
|
|
(frob neg-pi8 sub-pi8)
|
|
(frob neg-pi16 sub-pi16)
|
|
(frob neg-pi32 sub-pi32)
|
|
(frob neg-pi64 sub-pi64))
|
|
|
|
;; Logical inversion
|
|
|
|
(def-utility not-pi (int-sse-pack) int-sse-pack
|
|
(or (lookup-flip arg0 '((<=-pi8 . >-pi8)
|
|
(<=-pi16 . >-pi16)
|
|
(<=-pi32 . >-pi32)
|
|
(>=-pi8 . <-pi8)
|
|
(>=-pi16 . <-pi16)
|
|
(>=-pi32 . <-pi32)
|
|
(/=-pi8 . =-pi8)
|
|
(/=-pi16 . =-pi16)
|
|
(/=-pi32 . =-pi32)
|
|
(not-pi . :identity))
|
|
:no-reverse t)
|
|
`(xor-pi ,arg0 true-pi))
|
|
:expand-args t)
|
|
|
|
(macrolet ((frob (name code)
|
|
`(def-utility ,name (int-sse-pack int-sse-pack) int-sse-pack
|
|
,code
|
|
:expand-in-condition t)))
|
|
|
|
(frob <=-pi8 `(not-pi (>-pi8 ,arg0 ,arg1)))
|
|
(frob <=-pi16 `(not-pi (>-pi16 ,arg0 ,arg1)))
|
|
(frob <=-pi32 `(not-pi (>-pi32 ,arg0 ,arg1)))
|
|
|
|
(frob >=-pi8 `(not-pi (<-pi8 ,arg0 ,arg1)))
|
|
(frob >=-pi16 `(not-pi (<-pi16 ,arg0 ,arg1)))
|
|
(frob >=-pi32 `(not-pi (<-pi32 ,arg0 ,arg1)))
|
|
|
|
(frob /=-pi8 `(not-pi (=-pi8 ,arg0 ,arg1)))
|
|
(frob /=-pi16 `(not-pi (=-pi16 ,arg0 ,arg1)))
|
|
(frob /=-pi32 `(not-pi (=-pi32 ,arg0 ,arg1))))
|
|
|
|
;; Shifts
|
|
|
|
(defun slli-pi (x shift)
|
|
(declare (optimize (speed 0) (debug 0) (safety 1))
|
|
(type t x shift))
|
|
(check-type x sse-pack)
|
|
(check-type shift (unsigned-byte 8))
|
|
(ffi:c-inline (x shift) (:object :int) :object
|
|
"cl_object rv = ecl_make_int_sse_pack(_mm_setzero_si128());
|
|
unsigned bshift=(#1), i;
|
|
for (i = 0; i + bshift < 16; i++)
|
|
rv->sse.data.b8[i+bshift] = (#0)->sse.data.b8[i];
|
|
@(return) = rv;"))
|
|
|
|
(defun srli-pi (x shift)
|
|
(declare (optimize (speed 0) (debug 0) (safety 1))
|
|
(type t x shift))
|
|
(check-type x sse-pack)
|
|
(check-type shift (unsigned-byte 8))
|
|
(ffi:c-inline (x shift) (:object :int) :object
|
|
"cl_object rv = ecl_make_int_sse_pack(_mm_setzero_si128());
|
|
int bshift=(#1), i;
|
|
for (i = 16 - bshift - 1; i >= 0; i--)
|
|
rv->sse.data.b8[i] = (#0)->sse.data.b8[i+bshift];
|
|
@(return) = rv;"))
|
|
|
|
;; Extract & insert
|
|
|
|
(defun extract-pi16 (x index)
|
|
(declare (optimize (speed 0) (debug 0) (safety 1))
|
|
(type t x index))
|
|
(check-type x sse-pack)
|
|
(check-type index (unsigned-byte 8))
|
|
(ffi:c-inline (x index) (:object :int) :fixnum
|
|
"*((unsigned short*)&(#0)->sse.data.b8[((#1)&3)*2])"
|
|
:one-liner t))
|
|
|
|
(defun insert-pi16 (x ival index)
|
|
(declare (optimize (speed 0) (debug 0) (safety 1))
|
|
(type t x ival index))
|
|
(check-type x sse-pack)
|
|
(check-type index (unsigned-byte 8))
|
|
(ffi:c-inline (x ival index) (:int-sse-pack :int :int) :object
|
|
"cl_object rv = ecl_make_int_sse_pack(#0);
|
|
*((unsigned short*)&rv->sse.data.b8[((#2)&3)*2]) = (unsigned short)(#1);
|
|
@(return) = rv;"))
|
|
|
|
;; Shuffles
|
|
|
|
(defun shuffle-pi32 (x mask)
|
|
(declare (optimize (speed 0) (debug 0) (safety 1))
|
|
(type t x mask))
|
|
(check-type x sse-pack)
|
|
(check-type mask (unsigned-byte 8))
|
|
(ffi:c-inline (x mask) (:object :int) :int-sse-pack
|
|
"unsigned *pd = (unsigned*)(#0)->sse.data.b8;
|
|
@(return) = _mm_setr_epi32(pd[(#1)&3],pd[((#1)>>2)&3],pd[((#1)>>4)&3],pd[((#1)>>6)&3]);"))
|
|
|
|
(defun shufflelo-pi16 (x mask)
|
|
(declare (optimize (speed 0) (debug 0) (safety 1))
|
|
(type t x mask))
|
|
(check-type x sse-pack)
|
|
(check-type mask (unsigned-byte 8))
|
|
(ffi:c-inline (x mask) (:object :int) :int-sse-pack
|
|
"unsigned short *pd = (unsigned short*)(#0)->sse.data.b8;
|
|
@(return) = _mm_setr_epi16(
|
|
pd[(#1)&3],pd[((#1)>>2)&3],pd[((#1)>>4)&3],pd[(((#1)>>6)&3)],
|
|
pd[4], pd[5], pd[6], pd[7]
|
|
);"))
|
|
|
|
(defun shufflehi-pi16 (x mask)
|
|
(declare (optimize (speed 0) (debug 0) (safety 1))
|
|
(type t x mask))
|
|
(check-type x sse-pack)
|
|
(check-type mask (unsigned-byte 8))
|
|
(ffi:c-inline (x mask) (:object :int) :int-sse-pack
|
|
"unsigned short *pb = (unsigned short*)(#0)->sse.data.b8, *pd = pb+4;
|
|
@(return) = _mm_setr_epi16(
|
|
pb[0], pb[1], pb[2], pb[3],
|
|
pd[(#1)&3],pd[((#1)>>2)&3],pd[((#1)>>4)&3],pd[(((#1)>>6)&3)]
|
|
);"))
|
|
|