cmp: faster function calls for C compatible variadic dispatch

For C compatible variadic dispatch, the compiler now generates two
entrypoints for variadic functions. An entrypoint with specialized
signature that is used for direct C calls from the same file and an
entrypoint with generic signature that implements the variadic to
variadic dispatch, i.e. checking the number of arguments and then
calling the specialized entrypoint.

This approach is faster than using the wrapper functions in
variadic_dispatch_table. The reasons are threefold: we save a call to
ecl_process_env(), we don't need a call through a function pointer but
instead use a direct call to the specialized entrypoint and we emit
better code to deal with required arguments since the number of those
are known.

Moreover, for functions with optional arguments the new approach is
less stack hungry since we can allocate an array of size smaller than
ECL_C_ARGUMENTS_LIMIT to store the arguments.
This commit is contained in:
Marius Gerbershagen 2024-03-03 10:01:24 +01:00
parent bbc8b30478
commit dc286efb66
7 changed files with 122 additions and 55 deletions

View file

@ -73,7 +73,7 @@ Hint provided by Pascal J. Bourguignon.
* Cross-compile for the iOS platform (needs Xcode 11 or higher)
1. Build the host ECL
#+BEGIN_SRC shell-script
./configure --prefix=`pwd`/ecl-iOS-host --disable-c99complex
./configure CFLAGS="-DECL_C_COMPATIBLE_VARIADIC_DISPATCH" --prefix=`pwd`/ecl-iOS-host --disable-c99complex
make -j9
make install
rm -r build
@ -130,7 +130,7 @@ Emscripten target is a little fickle so keep in mind that:
1. Build the host ECL
#+begin_src shell-script
./configure ABI=32 CFLAGS="-m32 -g -O2" LDFLAGS="-m32 -g -O2" \
./configure ABI=32 CFLAGS="-m32 -g -O2 -DECL_C_COMPATIBLE_VARIADIC_DISPATCH" LDFLAGS="-m32 -g -O2" \
--prefix=`pwd`/ecl-emscripten-host --disable-threads
make -j16 && make install

View file

@ -55,8 +55,12 @@ ecl_make_cfun_va(cl_objectfn c_function, cl_object name, cl_object cblock, int n
cf = ecl_alloc_object(t_cfun);
#ifdef ECL_C_COMPATIBLE_VARIADIC_DISPATCH
cf->cfun.entry = variadic_dispatch_table[narg_fixed];
cf->cfun.entry_variadic = c_function;
if (narg_fixed == 0) {
cf->cfun.entry = c_function;
} else {
cf->cfun.entry = variadic_dispatch_table[narg_fixed];
cf->cfun.entry_variadic = c_function;
}
#else
cf->cfun.entry = c_function;
#endif
@ -82,8 +86,12 @@ ecl_make_cclosure_va(cl_objectfn c_function, cl_object env, cl_object block, int
cc = ecl_alloc_object(t_cclosure);
#ifdef ECL_C_COMPATIBLE_VARIADIC_DISPATCH
cc->cclosure.entry = variadic_dispatch_table[narg_fixed];
cc->cclosure.entry_variadic = c_function;
if (narg_fixed == 0) {
cc->cclosure.entry = c_function;
} else {
cc->cclosure.entry = variadic_dispatch_table[narg_fixed];
cc->cclosure.entry_variadic = c_function;
}
#else
cc->cclosure.entry = c_function;
#endif

View file

@ -115,6 +115,9 @@ ecl_def_string_array(feature_names,static,const) = {
#endif
#ifdef ECL_WSOCK
ecl_def_string_array_elt("WSOCK"),
#endif
#ifdef ECL_C_COMPATIBLE_VARIADIC_DISPATCH
ecl_def_string_array_elt("C-COMPATIBLE-VARIADIC-DISPATCH"),
#endif
ecl_def_string_array_elt(0)
};

View file

@ -83,26 +83,27 @@
(defun c2lambda-expr
(lambda-list body cfun fname description use-narg required-lcls closure-type
optional-type-check-forms keyword-type-check-forms
&aux (requireds (first lambda-list))
(optionals (second lambda-list))
(rest (third lambda-list)) rest-loc
(key-flag (fourth lambda-list))
(keywords (fifth lambda-list))
(allow-other-keys (sixth lambda-list))
(nreq (length requireds))
(nopt (/ (length optionals) 3))
(nkey (/ (length keywords) 4))
(varargs (or optionals rest key-flag allow-other-keys))
(fname-in-ihs-p (or (policy-debug-variable-bindings)
(and (policy-debug-ihs-frame)
(or description fname))))
simple-varargs
(*permanent-data* t)
(*unwind-exit* *unwind-exit*)
(*env* *env*)
(*inline-blocks* 0)
(last-arg))
optional-type-check-forms keyword-type-check-forms
variadic-entrypoint
&aux (requireds (first lambda-list))
(optionals (second lambda-list))
(rest (third lambda-list)) rest-loc
(key-flag (fourth lambda-list))
(keywords (fifth lambda-list))
(allow-other-keys (sixth lambda-list))
(nreq (length requireds))
(nopt (/ (length optionals) 3))
(nkey (/ (length keywords) 4))
(varargs (or optionals rest key-flag allow-other-keys))
(fname-in-ihs-p (or (policy-debug-variable-bindings)
(and (policy-debug-ihs-frame)
(or description fname))))
simple-varargs
(*permanent-data* t)
(*unwind-exit* *unwind-exit*)
(*env* *env*)
(*inline-blocks* 0)
(last-arg))
(declare (fixnum nreq nkey))
(if (and fname ;; named function
@ -118,7 +119,11 @@
;; check number of arguments
(wt-maybe-check-num-arguments use-narg
nreq
(if variadic-entrypoint
nil ; minimum number of arguments
; already checked in the
; varargs entrypoint
nreq)
(if (or rest key-flag allow-other-keys)
nil
(+ nreq nopt))
@ -303,11 +308,11 @@
(if fname
(wt " FEwrong_num_arguments(" (get-object fname) ");")
(wt " FEwrong_num_arguments_anonym();"))))
(if (and maxarg (= minarg maxarg))
(if (and minarg maxarg (= minarg maxarg))
(progn (wt-nl "if (ecl_unlikely(narg!=" minarg "))")
(wrong-num-arguments))
(progn
(when (plusp minarg)
(when (and minarg (plusp minarg))
(wt-nl "if (ecl_unlikely(narg<" minarg "))")
(wrong-num-arguments))
(when maxarg

View file

@ -115,7 +115,7 @@
(when fname
(wt-comment fname)))
(defun wt-call-normal (fun args type)
(defun wt-call-normal (fun args type &optional (narg (length args)))
(declare (ignore type))
(unless (fun-cfun fun)
(baboon "Function without a C name: ~A" (fun-name fun)))
@ -123,7 +123,6 @@
(maxarg (fun-maxarg fun))
(fun-c-name (fun-cfun fun))
(fun-lisp-name (fun-name fun))
(narg (length args))
(env nil))
(case (fun-closure fun)
(CLOSURE
@ -135,7 +134,7 @@
(let* ((j (- lex-lvl n 1))
(x (lex-env-var-name j)))
(push x args))))))
(unless (<= minarg narg maxarg)
(when (not (<= minarg narg maxarg))
(cmperr "Wrong number of arguments for function ~S"
(or fun-lisp-name 'ANONYMOUS)))
(when (fun-needs-narg fun)
@ -180,16 +179,18 @@
(format nil "ecl_nthcdr(~D,~A)" (- *env* expected-env-size) env-var)
env-var)))
(defun wt-make-closure (fun &aux (cfun (fun-cfun fun))
(variadic-entrypoint (fun-variadic-entrypoint fun)))
(defun wt-make-closure (fun)
(declare (type fun fun))
(let* ((closure (fun-closure fun))
(narg (fun-fixed-narg fun))
(narg-fixed (min (fun-minarg fun) si:c-arguments-limit)))
(variadic-entrypoint (fun-variadic-entrypoint fun))
(cfun (fun-cfun fun))
(entrypoint (or variadic-entrypoint cfun))
(narg-fixed (if variadic-entrypoint
0
(min (fun-minarg fun) si:c-arguments-limit))))
(cond ((eq closure 'CLOSURE)
(wt "ecl_make_cclosure_va((cl_objectfn)" cfun ","
(environment-accessor fun)
",Cblock," (min (fun-minarg fun) si:c-arguments-limit) ")"))
(wt "ecl_make_cclosure_va((cl_objectfn)" entrypoint "," (environment-accessor fun) ",Cblock," narg-fixed ")"))
((eq closure 'LEXICAL)
(baboon :format-control "wt-make-closure: lexical closure detected."))
(narg ; empty environment fixed number of args
@ -198,8 +199,7 @@
"ecl_make_cfun((cl_objectfn_fixed)")
entrypoint ",ECL_NIL,Cblock," narg-fixed ")"))
(t ; empty environment variable number of args
(wt "ecl_make_cfun_va((cl_objectfn)" cfun ",ECL_NIL,Cblock,"
(min (fun-minarg fun) si:c-arguments-limit) ")")))))
(wt "ecl_make_cfun_va((cl_objectfn)" entrypoint ",ECL_NIL,Cblock," narg-fixed ")")))))
;;;
;;; COERCE-LOC

View file

@ -351,7 +351,8 @@
(fun-required-lcls fun)
(fun-closure fun)
(fun-optional-type-check-forms fun)
(fun-keyword-type-check-forms fun))))
(fun-keyword-type-check-forms fun)
(fun-variadic-entrypoint fun))))
string))
;;; Variadic entrypoints
@ -364,6 +365,19 @@
;;; C calls exist, the C compiler will usually inline the fixed
;;; entrypoint and thus optimize away the overhead that this method
;;; incurs over only generating a variadic entrypoint.
;;;
;;; If we use C compatible variadic dispatch, we can likewise generate
;;; an entrypoint with the signature expected by the caller, i.e. a
;;; function with only the narg argument as a fixed parameter and all
;;; other arguments as variadic parameters. This function then checks
;;; the number of arguments and dispatches to the entrypoint where the
;;; required arguments appear as fixed parameters. The advantage over
;;; only generating a variadic entrypoint with generic signature is
;;; again that callers using direct C calls can skip some work.
;;; Typically, C compatible variadic dispatch is used when fixed
;;; arguments can be passed in registers while variadic arguments must
;;; be pushed onto the stack. Thus, direct C calls can skip pushing
;;; the required arguments onto the stack.
(defun fun-variadic-entrypoint (fun)
(let ((result (fun-variadic-entrypoint-cfun fun)))
(if (not (eq result :unknown))
@ -371,22 +385,57 @@
(setf (fun-variadic-entrypoint-cfun fun)
(and (policy-inline-nargs-check)
(not (fun-no-entry fun))
(not (fun-needs-narg fun))
(or (not (fun-needs-narg fun))
#+c-compatible-variadic-dispatch t)
;; lexical closures will never be called via a
;; variadic entrypoint, no need to create one
(not (eq (fun-closure fun) 'lexical))
(concatenate 'string (fun-cfun fun) "_va"))))))
(defun t3function-variadic-entrypoint (fun)
(t3function-header fun (fun-variadic-entrypoint-cfun fun) t)
(wt-nl-open-brace)
(wt-maybe-check-num-arguments t (fun-minarg fun) (fun-maxarg fun) (fun-name fun))
(wt-nl "return ")
(wt-call (fun-cfun fun) (fun-required-lcls fun))
(wt ";")
(wt-nl-close-many-braces 0))
(flet ((wt-return (args)
(wt-nl "return ")
(wt-call (fun-cfun fun) args)
(wt ";")
(wt-nl-close-many-braces 0)))
(t3function-header fun
(fun-variadic-entrypoint-cfun fun)
t
#+c-compatible-variadic-dispatch t)
(wt-nl-open-brace)
(wt-maybe-check-num-arguments t
(fun-minarg fun)
(fun-maxarg fun)
(fun-name fun))
#-c-compatible-variadic-dispatch
(wt-return (fun-required-lcls fun))
#+c-compatible-variadic-dispatch
(let ((maxargs (min (fun-maxarg fun) (1+ si:c-arguments-limit))))
;; For C compatible variadic dispatch, we handle both functions
;; with variadic and with fixed number of arguments
(wt-nl "cl_object x[" maxargs "];")
(wt-nl "va_list args; va_start(args,narg);")
(loop for i below (fun-minarg fun)
do (wt-nl "x[" i "] = ") (wt-coerce-loc :object 'VA-ARG) (wt ";"))
(unless (= (fun-minarg fun) (fun-maxarg fun))
(wt-nl "for (int i = " (fun-minarg fun) "; i < ")
(if (<= maxargs si:c-arguments-limit)
(wt "narg")
(wt "(narg < " maxargs " ? narg : " maxargs ")"))
(wt "; i++)")
(wt-open-brace)
(wt-nl "x[i] = ")
(wt-coerce-loc :object 'VA-ARG)
(wt ";")
(wt-nl-close-brace))
(wt-nl "va_end(args);")
(let ((args (loop for i below maxargs
collect (concatenate 'string "x[" (write-to-string i) "]"))))
(when (fun-needs-narg fun)
(push "narg" args))
(wt-return args)))))
(defun t3function-header (fun cfun needs-narg)
(defun t3function-header (fun cfun needs-narg &optional omit-requireds)
(let* ((comma "")
(lambda-expr (fun-lambda fun))
(lambda-list (c1form-arg 0 lambda-expr))
@ -394,6 +443,7 @@
repeat si::c-arguments-limit
for arg in (car lambda-list)
collect (next-lcl (var-name arg)))))
(setf (fun-required-lcls fun) requireds)
(cond ((fun-exported fun)
(wt-nl-h "ECL_DLLEXPORT cl_object " cfun "(")
(wt-nl "cl_object " cfun "("))
@ -408,10 +458,11 @@
(wt-h comma "volatile cl_object *")
(wt comma "volatile cl_object *lex" n)
(setf comma ", "))
(loop for lcl in (setf (fun-required-lcls fun) requireds)
do (wt-h comma "cl_object " *volatile*)
(wt comma "cl_object " *volatile* lcl)
(setf comma ", "))
(unless omit-requireds
(loop for lcl in requireds
do (wt-h comma "cl_object " *volatile*)
(wt comma "cl_object " *volatile* lcl)
(setf comma ", ")))
(when needs-narg
(wt-h ", ...")
(wt ", ..."))

View file

@ -167,7 +167,7 @@ The cross-compilation steps for iOS are similar to those for Android.
Build the host ECL:
@example
@verbatim
./configure --prefix=`pwd`/ecl-iOS-host --disable-c99complex
./configure CFLAGS="-DECL_C_COMPATIBLE_VARIADIC_DISPATCH" --prefix=`pwd`/ecl-iOS-host --disable-c99complex
make -j9
make install
rm -r build