Merge branch 'drop-msvc' into 'develop'

Drop support for Microsoft Visual Studio Compiler

Closes #809

See merge request embeddable-common-lisp/ecl!369
This commit is contained in:
Daniel Kochmański 2026-04-19 17:51:19 +00:00
commit e1bf9c2f36
140 changed files with 24 additions and 117564 deletions

View file

@ -32,6 +32,8 @@
- bugfix: MAKE-PACKAGE destructively modified definning form conses of the
package local nicknames breaking bytecmp on such packages (#839)
- Support for Microsoft Visual Studio compilers has been dropped
* 26.3.27 changes since 24.5.10
- Vendored libffi has been updated to the version 3.5.2

11
INSTALL
View file

@ -22,17 +22,6 @@ preceeded with the architecture specification:
make install
#+end_src
* Windows with Visual Studio C++ 2008
1. Open the Visual Studio x86 or x64 native tools command prompt
2. Enter the msvc directory
3. Read the file Makefile to find the configuration options. They
typically have the form ECL_UNICODE=1, ECL_THREADS=1, etc
4. Enter
nmake <options>
where <options> is a set of zero or more of those options
5. Use "nmake install" to create a directory called "package" with ECL in it.
6. Move that directory wherever you need.
* Cross-compile for Windows64 using MinGW
1. Install MinGW64 toolchain, for example:
#+begin_src shell-scrip

View file

@ -1,31 +0,0 @@
os: Visual Studio 2015
platform:
- amd64
- x86
configuration:
- Debug
- Release
# clone directory
clone_folder: c:\build
build_script:
- echo build_script
- cd c:\build\msvc
- if "%configuration%" == "Debug" set DEBUGFLAGS="ECL_DEBUG=1"
- if "%platform%" == "amd64" set PLATFORMFLAGS="ECL_WIN64=1"
- call "%VS140COMNTOOLS%\..\..\VC\vcvarsall.bat" %platform%
- nmake /f Makefile %DEBUGFLAGS% %PLATFORMFLAGS%
- nmake /f Makefile install %DEBUGFLAGS% %PLATFORMFLAGS%
#after_build:
# - cd c:\build\msvc
# - nmake /f Makefile windows-nsi
#artifacts:
# - path: ecl.zip
# name: ECL
deploy: off

View file

@ -1,528 +0,0 @@
#
# Makefile for ECoLisp
#
top_srcdir= ..\src
srcdir = ..\src
# =============================================================
# ECL configuration
# <BEGIN>
SHORT_SITE_NAME =
LONG_SITE_NAME =
ECL_VERSION = 24.5.10
ECL_VERSION_NUMBER= 240510
ARCHITECTURE = PENTIUM4
SOFTWARE_TYPE = NT
SOFTWARE_VERSION = 5.0
THEHOST = win32
# Symbols to add to *FEATURES* in the final executable
LSP_FEATURES = :ecl :common :common-lisp :ansi-cl :ffi :prefixed-api :cdr-14 :package-local-nicknames :clos :ecl-pde :long-float :ieee-floating-point :floating-point-exceptions :boehm-gc :dlopen :msvc :windows :win32 :cmu-format :clos-streams :uint8-t :uint16-t :uint32-t :uint64-t :long-long :little-endian :ecl-weak-hash
# Size of the C stack in bytes
ECL_DEFAULT_C_STACK_SIZE = 1048576
# Define to 1 to make a 64-bit build
# ECL_WIN64 =
# Define here the processor type to compile GMP library
# with maximum optimization. Possible values are:
# gc -> generic implementation
# p0 -> Pentium processor
# p3 -> Pentium III processor
# p4 -> Pentium IV processor
# amd64 -> X86_64 compatible processor
!if "$(GMP_TYPE)" == ""
GMP_TYPE = gc
!endif
# Set it to non-empty to include Win32 thread support
# Currently it is NOT SUPPORTED to build ECL without threads. The reason
# is that certain exception handlers in Windows always use new threads.
# Without them, ECL would be an even more fragile piece of software.
ECL_THREADS = 1
# Set it to empty to remove support for Unicode characters
ECL_UNICODE = 1
# Set it to non-empty to enable Win32 debug support
#ECL_DEBUG = 1
# Set it to non-empty to support SSE2 intrinsics
ECL_SSE =
# Add the extensions to include in the build process. Comment any
# of the following lines to remove a feature from the build process
# LISP->C compiled
ECL_CMP =
# ASDF support
ECL_ASDF =
# TCP support
ECL_SOCKETS =
# Regression Tests support
ECL_RT =
# Defsystem support
ECL_DEFSYS =
# Profiling
ECL_PROFILE =
# Use the DbgHelp.lib shared library to provide C Backtrace support
ECL_USE_DBGHELP = $(ECL_DEBUG)
!if "$(YASM)" == ""
!if "$(ECL_WIN64)" != ""
YASM=yasm-1.3.0-win64.exe
!else
YASM=yasm-1.3.0-win32.exe
!endif
!endif
# <END> (ECL configuration)
# =============================================================
TAR_DIR = %CD%\ecl-$(ECL_VERSION)
# Programs used by "make":
#
CC = cl
CLIBS = user32.lib ws2_32.lib shell32.lib
STATICLIBS = eclgc.lib eclgmp.lib
LIBS = $(STATICLIBS) $(CLIBS)
RM = del
RMDIR = rmdir /Q /S
MKDIR = mkdir
EXE = .exe
CP = copy /Y
MV = move /Y
MSDEV = msdev
MKNSI = makensis.exe
# ==================== Flags ====================
!if "$(ECL_WIN64)" != ""
GMP_BITS=64
GC_CPU=AMD64
ARCHITECTURE=X86-64
VCREDIST=vcredist_x64.exe
!else
GMP_BITS=32
GC_CPU=i386
ARCHITECTURE=PENTIUM4
VCREDIST=vcredist_x86.exe
!if "$(ECL_SSE)" != ""
CFLAGS_SSE=/arch:SSE2
!endif
!endif
#
# Configuration-specific (Debug/Release) options
#
!if "$(ECL_DEBUG)" != ""
CFLAGS_OPTIMIZE = /Od
CFLAGS_CONFIG = /Zi /D_DEBUG /MDd $(CFLAGS_OPTIMIZE) $(CFLAGS_SSE)
LDFLAGS_CONFIG = /debug
SHARED_LDFLAGS = /LDd
GCFLAGS =
!else
CFLAGS_OPTIMIZE = /O2
CFLAGS_CONFIG = /DNDEBUG /MD $(CFLAGS_OPTIMIZE) $(CFLAGS_SSE)
LDFLAGS_CONFIG =
SHARED_LDFLAGS = /LD
GCFLAGS = nodebug=1
!endif
!if "$(ECL_USE_DBGHELP)" != ""
CLIBS = $(CLIBS) DbgHelp.lib
!endif
CFLAGS = /EHsc /DGC_DLL /DGC_BUILD /nologo /wd4068 /wd4715 /wd4716 /D_CRT_SECURE_NO_DEPRECATE $(CFLAGS_CONFIG)
LDFLAGS = /link /incremental:no /nologo $(LDFLAGS_CONFIG) /STACK:$(ECL_DEFAULT_C_STACK_SIZE)
# Additional configuration for thread support
#
!if "$(ECL_THREADS)" == ""
ENV_EXPORT = cl_env,DATA
!else
ENV_EXPORT = ecl_process_env
CFLAGS = $(CFLAGS)
DEF = ecl-threads.def
!endif
!MESSAGE C++ compiler flags: $(CFLAGS)
!MESSAGE C++ linker flags: $(LDFLAGS)
# ==================== Where To Install Things ====================
# The default location for installation. Everything is placed in
# subdirectories of this directory. The default values for many of
# the variables below are expressed in terms of this one, so you may
# not need to change them. This defaults to /usr/local.
prefix=%CD%\package
exec_prefix=$(prefix)
bindir=$(prefix)
libdir=$(prefix)
includedir=$(prefix)
docdir=$(prefix)\doc
# Programs used by "make install":
#
SHELL = @SHELL@
INSTALL = @INSTALL@
INSTALL_PROGRAM = @INSTALL_PROGRAM@
INSTALL_SCRIPT = @INSTALL_SCRIPT@
INSTALL_DATA = @INSTALL_DATA@
mkinstalldirs = $(top_srcdir)/bdwgc/install-sh -d
# Files
#
SUBDIR = c gc gmp
LIBRARIES =
TARGETS = ecl2$(EXE)
DEF = ecl.def
# Set features
#
LSP_FEATURES = :$(ARCHITECTURE) $(LSP_FEATURES)
!if "$(ECL_WIN64)" != ""
LSP_FEATURES = :win64 $(LSP_FEATURES)
!endif
!if "$(ECL_THREADS)" != ""
LSP_FEATURES = :threads :ecl-read-write-lock $(LSP_FEATURES)
!endif
!if "$(ECL_UNICODE)" != ""
LSP_FEATURES = :unicode $(LSP_FEATURES)
!endif
!if "$(ECL_SSE)" != ""
LSP_FEATURES = :sse2 $(LSP_FEATURES)
!endif
!ifdef ECL_SOCKETS
LSP_FEATURES = :wsock $(LSP_FEATURES)
!endif
# Additional modules
#
ECL_MODULES =
COMPILATION_FEATURES = :wants-dlopen
!ifdef ECL_CMP
ECL_MODULES = $(ECL_MODULES) cmp
COMPILATION_FEATURES = :wants-cmp $(COMPILATION_FEATURES)
!endif
!ifdef ECL_ASDF
ECL_MODULES = $(ECL_MODULES) asdf
COMPILATION_FEATURES = :wants-asdf $(COMPILATION_FEATURES)
!endif
!ifdef ECL_SOCKETS
ECL_MODULES = $(ECL_MODULES) sockets
COMPILATION_FEATURES = :wants-sockets $(COMPILATION_FEATURES)
!endif
!ifdef ECL_RT
ECL_MODULES = $(ECL_MODULES) rt
COMPILATION_FEATURES = :wants-rt $(COMPILATION_FEATURES)
!endif
!ifdef ECL_DEFSYS
ECL_MODULES = $(ECL_MODULES) defsystem
COMPILATION_FEATURES = :wants-defsystem $(COMPILATION_FEATURES)
!endif
!ifdef ECL_PROFILE
ECL_MODULES = $(ECL_MODULES) profile
COMPILATION_FEATURES = :wants-profile $(COMPILATION_FEATURES)
!endif
ECL_MODULES = $(ECL_MODULES) bytecmp
COMPILATION_FEATURES = :builtin-bytecmp :wants-bytecmp $(COMPILATION_FEATURES)
ECL_MODULES = $(ECL_MODULES) ecl-curl
COMPILATION_FEATURES = :wants-ecl-curl $(COMPILATION_FEATURES)
ECL_MODULES = $(ECL_MODULES) deflate
COMPILATION_FEATURES = :wants-deflate $(COMPILATION_FEATURES)
!MESSAGE ECL Modules: $(ECL_MODULES)
!MESSAGE ECL Features: $(COMPILATION_FEATURES)
# Build rules
#
all: $(TARGETS) ecl-config.bat ecl-cc.bat
.PHONY: all
%Makefile: $(srcdir)/%Makefile.in config.status
./config.status
c\cut$(EXE): $(top_srcdir)\util\cut.c
cd c
$(MAKE) cut$(EXE)
cd ..
$(TARGETS): $(UCDDAT) ecl_min$(EXE) compile.lsp BUILD-STAMP
set ECLDIR=./
ecl_min compile
BUILD-STAMP: Makefile
date /t > $@
ecl-static.lib: $(TARGETS)
link /lib /out:$@ eclmin.lib c\all_symbols2.obj lsp.lib eclgmp.lib eclgc.lib
ecl_min$(EXE): $(LIBRARIES) eclmin.lib
$(CC) /Fe$@ cinit.obj c\all_symbols.obj eclmin.lib $(LDFLAGS) $(LIBS)
if exist ecl_min$(EXE).manifest \
mt -manifest ecl_min.exe.manifest -outputresource:ecl_min.exe;1
-$(RM) ecl_min.exp ecl_min.lib ecl_min$(EXE).manifest
.gdbinit: $(srcdir)\util\gdbinit
$(CP) $(srcdir)\util\gdbinit $@
lsp/config.lsp: $(srcdir)/lsp/config.lsp.in Makefile c\cut$(EXE)
if not exist lsp $(MKDIR) lsp
c\cut "@ecldir\@" "$(libdir:\=/)" \
"@SHORT_SITE_NAME@" "$(SHORT_SITE_NAME)" \
"@LONG_SITE_NAME@" "$(LONG_SITE_NAME)" \
"@PACKAGE_VERSION@" "$(ECL_VERSION)" \
"@ARCHITECTURE@" "$(ARCHITECTURE)" \
"@SOFTWARE_TYPE@" "$(SOFTWARE_TYPE)" \
"@SOFTWARE_VERSION@" "$(SOFTWARE_VERSION)" \
"@thehost@" "$(THEHOST)" \
"@true_srcdir@" "$(srcdir:\=/)" \
"@top_srcdir@" "$(top_srcdir:\=/)/" \
< $(srcdir)\lsp\config.lsp.in > lsp\config.lsp
compile.lsp: bare.lsp $(srcdir)/compile.lsp.in Makefile
c\cut "@ecldir\@" "$(libdir:\=/)" \
"@libdir\@" "$(libdir:\=/)" \
"@true_srcdir@" "$(srcdir:\=/)" \
"@true_builddir@" "$(MAKEDIR:\=/)" \
"@CFLAGS@" "$(CFLAGS) -DGC_BUILD" \
"@ECL_CFLAGS@" "" \
"@CPPFLAGS@" "" \
"@ECL_LDRPATH@" "" \
"@LDFLAGS@" "$(LDFLAGS)" \
"@SHARED_LDFLAGS@" "$(SHARED_LDFLAGS)" \
"@BUNDLE_LDFLAGS@" "$(SHARED_LDFLAGS)" \
"@PROGRAM_LDFLAGS@" "" \
"@CLIBS@" "$(CLIBS)" \
"@STATICLIBS@" "$(STATICLIBS)" \
"@LIBS@" "$(LIBS)" \
"@CORE_LIBS@" "" \
"@FASL_LIBS@" "" \
"@OBJEXT@" "obj" \
"@SHAREDPREFIX@" "" \
"@SHAREDEXT@" "lib" \
"@LIBPREFIX@" "" \
"@LIBEXT@" "lib" \
"@EXEEXT@" ".exe" \
"@LDINSTALLNAME@" "" \
"@DEF@" "$(DEF)" \
"@RANLIB@" "ranlib" \
"@LSP_FEATURES@" "$(LSP_FEATURES)" \
"@COMPILATION_FEATURES@" "$(COMPILATION_FEATURES)" \
"@ECL_CMPDIR@" "cmp" \
"@ECL_EXTRA_LISP_FILES@" "" \
"@ECL_INIT_FORM@" "(si::top-level t)" \
< $(srcdir)\compile.lsp.in > compile.lsp
bare.lsp: $(srcdir)/bare.lsp.in lsp/load.lsp clos/load.lsp cmp/load.lsp cmp/cmpdefs.lsp
c\cut "@true_srcdir@" "$(srcdir:\=/)" \
"@top_srcdir@" "$(srcdir:\=/)" \
"@true_builddir@" "$(MAKEDIR:\=/)"\
"@ECL_CMPDIR@" "cmp" < $(srcdir)\bare.lsp.in > bare.lsp
lsp/load.lsp: $(srcdir)/lsp/load.lsp.in
$(CP) $(srcdir)\lsp\load.lsp.in lsp\load.lsp
clos/load.lsp: $(srcdir)/clos/load.lsp.in
if not exist clos $(MKDIR) clos
$(CP) $(srcdir)\clos\load.lsp.in clos\load.lsp
cmp/load.lsp: $(srcdir)/cmp/load.lsp.in
if not exist cmp $(MKDIR) cmp
$(CP) $(srcdir)\cmp\load.lsp.in cmp\load.lsp
cmp/cmpdefs.lsp: $(srcdir)/cmp/cmpdefs.lsp Makefile
c\cut "@ECL_CC@" "$(CC)" \
"@CFLAGS@" "$(CFLAGS)" \
"@CFLAGS_OPTIMIZE@" "$(CFLAGS_OPTIMIZE)" \
"@ECL_CFLAGS@" "" \
"@CPPFLAGS@" "" \
"@ECL_LDRPATH@" "" \
"@LDFLAGS@" "$(LDFLAGS)" \
"@SHARED_LDFLAGS@" "$(SHARED_LDFLAGS)" \
"@BUNDLE_LDFLAGS@" "$(SHARED_LDFLAGS)" \
"@PROGRAM_LDFLAGS@" "" \
"@CLIBS@" "$(CLIBS)" \
"@STATICLIBS@" "$(STATICLIBS)" \
"@OBJEXT@" "obj" \
"@SHAREDPREFIX@" "" \
"@SHAREDEXT@" "dll" \
"@LIBPREFIX@" "" \
"@LIBEXT@" "lib" \
"@EXEEXT@" ".exe" \
"@ecldir\@" "NIL" \
"@libdir\@" "NIL" \
"@includedir\@" "NIL" \
"@ARCHITECTURE@" "$(ARCHITECTURE)"\
"@SOFTWARE_TYPE@" "$(SOFTWARE_TYPE)"\
"@PACKAGE_VERSION@" "$(PACKAGE_VERSION)"\
< $(srcdir)\cmp\cmpdefs.lsp > cmp\cmpdefs.lsp
ecl-config.bat: util\ecl-config.bat Makefile
c\cut "~A" "$(libdir:\=/)"\
"~*" "" \
"@ECL_CFLAGS@" "$(CFLAGS)" \
"@LDFLAGS@" "$(LDFLAGS)" \
"@CLIBS@" "" \
"@libdir@" "$(prefix:\=/)" \
"@includedir@" "$(prefix:\=/)/ecl" \
< util\ecl-config.bat > ecl-config.bat
ecl-cc.bat: util\ecl-cc.bat Makefile
c\cut "@ECL_CFLAGS@" "$(CFLAGS)" \
"@LDFLAGS@" "$(LDFLAGS)" \
"@CLIBS@" "" \
"@libdir@" "$(prefix:\=/)" \
"@includedir@" "$(prefix:\=/)/ecl" \
< util\ecl-cc.bat > ecl-cc.bat
c/ecl_features.h: $(srcdir)/c/ecl_features.h.in Makefile
c\cut "@LSP_FEATURES@" "$(LSP_FEATURES)" \
< $(srcdir)\c\ecl_features.h.in > c\ecl_features.h
eclmin.lib: eclgmp.lib eclgc.lib lsp/config.lsp c/ecl_features.h
cd c
$(MAKE) /nologo ECL_VERSION_NUMBER=$(ECL_VERSION_NUMBER) \
ECL_DEFAULT_C_STACK_SIZE=$(ECL_DEFAULT_C_STACK_SIZE) \
ECL_THREADS=$(ECL_THREADS) ECL_UNICODE=$(ECL_UNICODE) \
ECL_SSE=$(ECL_SSE) ECL_WIN64=$(ECL_WIN64) \
ECL_USE_DBGHELP=$(ECL_USE_DBGHELP) \
ECL_ARCHITECTURE=$(ARCHITECTURE) \
"ECL_CFLAGS=$(CFLAGS) -DGC_BUILD"
cd ..
eclgc.lib:
cd gc
$(MAKE) /nologo $(GCFLAGS) ECL_THREADS=$(ECL_THREADS) "CFLAGS_CONFIG=$(CFLAGS_CONFIG)" "MY_CPU=$(GC_CPU)" gc.lib
$(CP) gc.lib ..\eclgc.lib
cd ..
if not exist ecl\gc $(MKDIR) ecl\gc
if not exist ecl\gc\private $(MKDIR) ecl\gc\private
for %h in (gc.h gc_version.h gc_local_alloc.h gc_pthread_redirects.h \
gc_config_macros.h leak_detector.h gc_typed.h \
private\gc_priv.h private\gcconfig.h gc_mark.h \
new_gc_alloc.h weakpointer.h gc_pthread_redirects.h) \
do $(CP) $(srcdir)\bdwgc\include\%h ecl\gc\%h
eclgmp.lib:
cd gmp
$(MAKE) /nologo "MPN_TYPE=$(GMP_TYPE)" "CFLAGS_CONFIG=$(CFLAGS_CONFIG)" \
"BITS=$(GMP_BITS)" "YASM=$(YASM)"
$(CP) gmp.lib ..\eclgmp.lib
$(CP) gmp.h ..\ecl\gmp.h
cd ..
install:
IF NOT EXIST "$(prefix)" $(MKDIR) "$(prefix)"
IF NOT EXIST "$(bindir)" $(MKDIR) "$(bindir)"
for %i in ($(TARGETS) ecl.dll) do $(CP) %i "$(bindir)\%i"
IF EXIST "$(bindir)\ecl2$(EXE)" $(MV) "$(bindir)\ecl2$(EXE)" "$(bindir)\ecl$(EXE)"
IF EXIST ecl2$(EXE).manifest $(CP) ecl2$(EXE).manifest "$(bindir)\ecl$(EXE).manifest"
IF EXIST ecl.dll.manifest $(CP) ecl.dll.manifest "$(bindir)"
$(CP) ecl-config.bat "$(bindir)\ecl-config.bat"
$(CP) ecl-cc.bat "$(bindir)\ecl-cc.bat"
IF NOT EXIST "$(includedir)\ecl" $(MKDIR) "$(includedir)\ecl"
IF NOT EXIST "$(includedir)\ecl\impl" $(MKDIR) "$(includedir)\ecl\impl"
IF NOT EXIST "$(includedir)\ecl\gc" $(MKDIR) $(includedir)\ecl\gc
IF NOT EXIST "$(includedir)\ecl\gc\private" $(MKDIR) "$(includedir)\ecl\gc\private"
for %i in (ecl\*.h ecl\impl\*.h ecl\gc\*.h ecl\gc\private\*.h) do $(CP) %i $(includedir)\%i
IF EXIST "$(include)\ecl\atomic_ops" rmdir /S /Q "$(include)\ecl\atomic_ops"
IF EXIST "ecl\atomic_ops" xcopy /S /Y "ecl\atomic_ops" "$(includedir)\atomic_ops\"
cd c
$(MAKE) /nologo cut.exe
cd ..
c\cut.exe < ecl\config.h > $(includedir)\ecl\config.h
IF NOT EXIST "$(libdir)" $(MKDIR) "$(libdir)"
IF NOT EXIST "$(libdir)\ecl" $(MKDIR) "$(libdir)\ecl"
for %i in ($(LIBRARIES) c\dpp.exe BUILD-STAMP help.doc ecl.lib) do $(CP) %i "$(libdir)"
for /f %i in ('type MODULES') do $(CP) %i "$(libdir)"
for %i in (ecl-static.lib) do IF EXIST %i $(CP) %i "$(libdir)"
IF NOT EXIST "$(docdir)" $(MKDIR) "$(docdir)"
for %i in (..\LICENSE ..\LGPL ..\README.md ..\CHANGELOG) do $(CP) %i "$(docdir)"
!if "$(ECL_UNICODE)" != ""
IF NOT EXIST "$(libdir)\encodings" $(MKDIR) "$(libdir)\encodings"
$(CP) encodings\*.* "$(libdir)\encodings"
!endif
check: package\ecl.exe
cd tests
$(MAKE) /nologo
recheck: package\ecl.exe
cd tests
$(MAKE) /nologo clean
$(MAKE) /nologo
package\ecl.exe: ecl2$(EXE)
$(MAKE) /nologo install ECL_THREADS=$(ECL_THREADS) ECL_UNICODE=$(ECL_UNICODE)
windows-nsi:
IF EXIST "$(TAR_DIR)" $(RMDIR) "$(TAR_DIR)"
$(MKDIR) "$(TAR_DIR)"
$(MAKE) prefix="$(TAR_DIR)" install ECL_UNICODE=$(ECL_UNICODE)
IF EXIST "$(VCREDIST)" $(CP) "$(VCREDIST)" "$(TAR_DIR)"
util\ecl_nsi.bat %%CD%%\"$(srcdir)"\util\ecl.nsi "$(TAR_DIR)" $(ECL_VERSION)
"$(MKNSI)" "$(TAR_DIR)/ecl.nsi"
$(MV) $(TAR_DIR)\Setup.exe ecl-$(ECL_VERSION).exe
# $(RMDIR) $(TAR_DIR)
clean: clean_ecl clean_lisp
-$(RM) .gdbinit cinit.lib ecl_min.lib
-$(RM) bdwgc\*.pdb
cd gc
-$(MAKE) /nologo $(GCFLAGS) ECL_THREADS=$(ECL_THREADS) "CFLAGS_CONFIG=$(CFLAGS_CONFIG)" "MY_CPU=$(GC_CPU)" clean
cd ..
-for %h in (gc.h gc_local_alloc.h gc_pthread_redirects.h \
gc_config_macros.h leak_detector.h gc_typed.h \
gc_mark.h private\gc_priv.h private\gcconfig.h) \
do $(RM) ecl\gc\%h
-$(RMDIR) ecl\gc\private
-$(RMDIR) ecl\gc
cd gmp
-$(MAKE) /nologo "MPN_TYPE=$(GMP_TYPE)" "CFLAGS_CONFIG=$(CFLAGS_CONFIG)" \
"BITS=$(GMP_BITS)" "YASM=$(YASM)" clean
cd ..
-$(RM) ecl\gmp.h
clean_ecl:
-for %i in (eclgc.lib eclgmp.lib lsp\config.lsp compile.lsp bare.lsp \
lsp\load.lsp clos\load.lsp cmp\load.lsp cmp\cmpdefs.lsp \
ecl.lib ecl.dll ecl_min$(EXE) eclmin.lib help.doc \
BUILD-STAMP $(TARGETS) *.exp *.ilk *.manifest *.pdb *.c *.obj \
ecl-config.bat ecl-static.lib *.tmp *.implib *.lib ecl.ico \
ecl-cc.bat ecl.rc ecl.res) \
do $(RM) %i
cd c
-$(MAKE) /nologo ECL_VERSION_NUMBER=$(ECL_VERSION_NUMBER) \
ECL_THREADS=$(ECL_THREADS) ECL_UNICODE=$(ECL_UNICODE) \
ECL_SSE=$(ECL_SSE) ECL_WIN64=$(ECL_WIN64) \
ECL_USE_DBGHELP=$(ECL_USE_DBGHELP) \
"ECL_CFLAGS=$(CFLAGS) -DGC_BUILD" clean
cd ..
clean_lisp:
-for %i in (lsp clos $(ECL_MODULES)) do for %k in (%i.lib %i.fas %i.ilk %i.c %i.obj %i.pdb) do $(RM) %k
-for %i in (lsp clos ext $(ECL_MODULES)) do $(RMDIR) %i
-for %i in ( *.fas *.implib *.lib *.asd) do $(RM) %i
-$(RM) help.doc
-$(RM) MODULES
distclean: clean
realclean: distclean
test1:
cd c; $(MAKE)
$(MAKE) ecl_min
$(MAKE) ecl
cd tests; $(MAKE)
diff tests tests2
test2:
$(MAKE) clean_lisp
cd c; $(MAKE)
$(MAKE) ecl_min
$(RM) ecl
$(MAKE) ecl
for i in lsp clos cmp; do diff --exclude=\*.o $$i old/$$i; done
test3:
-mkdir stage2
cp -rf lsp clos cmp stage2
-for i in lsp cmp clos tk; do test -f lib$$i.a && mv lib$$i.a stage2; done
$(MAKE) clean_lisp
./ecl < compile.lsp
-for i in lsp clos cmp tk; do test -d $$i && diff --exclude=\*.o $$i stage2/$$i; done | less
test:
$(MAKE) -C tests
$(MAKE) -C ansi-tests > ansi-tests/log
# -(diff tests ~/src/tests; diff --exclude log ansi-tests ~/src/ansi-tests) | less

View file

@ -1,223 +0,0 @@
#
# Makefile for ECL core library
#
top_srcdir = ..\..\src
srcdir = ..\..\src\c
!if "$(ECL_WIN64)" != ""
ECL_FPE_CODE=fpe_none.c
!else
ECL_FPE_CODE=fpe_x86.c
!endif
!if "$(ECL_THREADS)" != ""
ECL_THREADS_FLAG=1
THREADS_OBJ= thread.obj mutex.obj condition_variable.obj rwlock.obj \
semaphore.obj barrier.obj mailbox.obj
!else
ECL_THREADS_FLAG=0
THREADS_OBJ=
!endif
!if "$(ECL_UNICODE)" != ""
ECL_UNICODE_FLAG=21
ECL_UCD_OBJ = ucd.obj ucd-0000.obj ucd-0016.obj \
ucd-0032.obj ucd-0048.obj ucd-0064.obj \
ucd-0080.obj ucd-0096.obj ucd-0112.obj \
ucd-0128.obj ucd-0144.obj ucd_names_char.obj ucd_names_codes.obj ucd_names_pair.obj ucd_names_str.obj
!else
ECL_UNICODE_FLAG=0
!endif
!if "$(ECL_SSE)" != ""
ECL_SSE_FLAG=1
ECL_SSE_OBJ=sse2.obj
!else
ECL_SSE_FLAG=0
ECL_SSE_OBJ=
!endif
!if "$(ECL_USE_DBGHELP)" != ""
ECL_USE_DBGHELP_FLAG=1
!else
ECL_USE_DBGHELP_FLAG=0
!endif
# Programs used by "make":
#
CC = cl
CFLAGS = -c $(ECL_CFLAGS) -DECL_BUILD -DECL_API="__declspec(dllexport)" -I./ -I../ -I../ecl -I$(srcdir) -I$(srcdir)/unicode
SHELL = /bin/sh
RM = del
CP = copy /Y
MV = move /Y
LINK = link
EXE = .exe
DPP = .\dpp$(EXE)
# Data for installation
#
INSTALL = @INSTALL@
INSTALL_DATA = @INSTALL_DATA@
prefix=@prefix@
exec_prefix=$(prefix)
libdir=$(prefix)\lib\ecl
# Files
HDIR = $(top_srcdir)\h
HFILES = ..\ecl\config.h ..\ecl\config-internal.h ..\ecl\atomic_ops.h \
$(HDIR)\ecl.h $(HDIR)\ecl-cmp.h \
$(HDIR)\object.h $(HDIR)\cs.h $(HDIR)\stacks.h \
$(HDIR)\external.h $(HDIR)\cons.h $(HDIR)\legacy.h \
$(HDIR)\number.h $(HDIR)\page.h \
$(HDIR)\internal.h $(HDIR)\ecl-inl.h $(HDIR)\bytecodes.h \
$(HDIR)\impl\math_dispatch.h $(HDIR)\impl\math_dispatch2.h \
$(HDIR)\impl\math_fenv.h $(HDIR)\impl\math_fenv_msvc.h \
$(HDIR)\cache.h $(HDIR)\stack-resize.h \
$(HDIR)\ecl_atomics.h
CLOS_OBJS = cache.obj accessor.obj instance.obj gfun.obj
NUM_OBJS = number.obj num_pred.obj num_arith.obj num_co.obj num_log.obj num_rand.obj \
cos.obj sin.obj tan.obj atan.obj \
cosh.obj sinh.obj tanh.obj exp.obj \
expt.obj log.obj sqrt.obj abs.obj \
zerop.obj plusp.obj minusp.obj negate.obj \
conjugate.obj one_plus.obj one_minus.obj \
plus.obj minus.obj times.obj divide.obj \
number_compare.obj number_equalp.obj minmax.obj \
floor.obj ceiling.obj round.obj truncate.obj
WRITER_OBJS = print.obj float_to_digits.obj float_to_string.obj \
integer_to_string.obj write_ugly.obj \
write_object.obj write_symbol.obj write_array.obj \
write_list.obj write_code.obj write_sse.obj \
print_unreadable.obj
READER_OBJS = read.obj parse_integer.obj parse_number.obj
STREAM_OBJS = stream.obj file.obj strm_os.obj strm_clos.obj \
strm_string.obj strm_composite.obj strm_common.obj \
strm_sequence.obj strm_eformat.obj strm_binary.obj
FFI_OBJS = ffi.obj libraries.obj backtrace.obj mmap.obj cdata.obj
OBJS = main.obj symbol.obj package.obj cons.obj list.obj apply.obj eval.obj \
interpreter.obj compiler.obj disassembler.obj reference.obj character.obj \
error.obj string.obj cfun.obj typespec.obj assignment.obj memory.obj \
predicate.obj array.obj vector_push.obj sequence.obj cmpaux.obj macros.obj \
backq.obj stacks.obj time.obj unixint.obj mapfun.obj multival.obj hash.obj \
format.obj pathname.obj structure.obj load.obj unixfsys.obj unixsys.obj \
serialize.obj atomic.obj process.obj \
big.obj alloc_2.obj tcp.obj \
$(BOOT_OBJS) $(NUM_OBJS) $(WRITER_OBJS) $(READER_OBJS) $(STREAM_OBJS) \
$(CLOS_OBJS) $(FFI_OBJS) $(THREADS_OBJ) $(ECL_UCD_OBJ) $(ECL_SSE_OBJ)
all: $(DPP) ..\eclmin.lib ..\cinit.obj
.SUFFIXES: .obj .c .d
{$(srcdir:\=/)}.d{}.c:
$(DPP) $< $@
{$(srcdir:\=/)/arch}.d{}.c:
$(DPP) $< $@
{$(srcdir:\=/)/clos/}.d{}.c:
$(DPP) $< $@
{$(srcdir:\=/)/reader}.d{}.c:
$(DPP) $< $@
{$(srcdir:\=/)/streams}.d{}.c:
$(DPP) $< $@
{$(srcdir:\=/)/printer}.d{}.c:
$(DPP) $< $@
{$(srcdir:\=/)/ffi}.d{}.c:
$(DPP) $< $@
{$(srcdir:\=/)/unicode}.c{}.c:
$(DPP) $< $@
{$(srcdir:\=/)/numbers}.d{}.c:
$(DPP) $< $@
{$(srcdir:\=/)/threads}.d{}.c:
$(DPP) $< $@
#.d.c:
# $(DPP) $< $@
.c.obj:
$(CC) $(CFLAGS) /Fo$@ $<
.PHONY: all
cut.exe: $(top_srcdir)/util/cut.c
$(CC) $(LDFLAGS) /Fecut.exe $(top_srcdir)/util/cut.c
..\eclmin.lib: $(OBJS:.obj=.c) all_symbols.c $(OBJS) all_symbols.obj all_symbols2.obj
-$(RM) $@
$(LINK) -lib -nologo -out:$@ $(OBJS)
clean:
-for %f in ($(OBJS:.obj=.c) all_symbols.c) do $(RM) %f
-for %f in ($(OBJS) all_symbols.obj all_symbols2.obj) do $(RM) %f
-$(RM) *.pdb
-for %f in (..\ecl\config.h dpp dpp.obj $(DPP) cut.exe cut.obj \
..\eclmin.lib ..\cinit.obj cinit.c) \
do $(RM) %f
-for %f in (..\ecl\*.h) do $(RM) %f
-for %f in (cut.exe cut.obj dpp.exe dpp.obj) do $(RM) %f
-del /S /Q ..\ecl\atomic_ops
-del /S /Q ..\ecl\impl
# Build rules
$(DPP): $(srcdir)/dpp.c $(srcdir)/symbols_list.h ../ecl/config.h ../ecl/config-internal.h
$(CC) /nologo -I.. -I./ $(srcdir)/dpp.c /Fe$@
$(HFILES): ../ecl/config.h.msvc6 ../ecl/config-internal.h.msvc6 Makefile
-mkdir ..\ecl\impl
cut.exe "@ECL_FPE_CODE@" "$(srcdir:\=/)/arch/$(ECL_FPE_CODE)" \
"@ECL_VERSION_NUMBER@" "$(ECL_VERSION_NUMBER)" \
"@ECL_DEFAULT_C_STACK_SIZE@" "$(ECL_DEFAULT_C_STACK_SIZE)" \
"@ECL_THREADS@" "$(ECL_THREADS_FLAG)" \
"@ECL_UNICODE@" "$(ECL_UNICODE_FLAG)" \
"@ECL_SSE2@" "$(ECL_SSE_FLAG)" \
"@ECL_USE_DBGHELP@" "$(ECL_USE_DBGHELP_FLAG)" \
< ..\ecl\config.h.msvc6 > ..\ecl\config.h
cut.exe "@ECL_FPE_CODE@" "$(srcdir:\=/)/arch/$(ECL_FPE_CODE)" \
"@ECL_VERSION_NUMBER@" "$(ECL_VERSION_NUMBER)" \
"@ECL_DEFAULT_C_STACK_SIZE@" "$(ECL_DEFAULT_C_STACK_SIZE)" \
"@ECL_THREADS@" "$(ECL_THREADS_FLAG)" \
"@ECL_UNICODE@" "$(ECL_UNICODE_FLAG)" \
"@ECL_SSE2@" "$(ECL_SSE_FLAG)" \
"@ECL_USE_DBGHELP@" "$(ECL_USE_DBGHELP_FLAG)" \
"@ARCHITECTURE@" "$(ECL_ARCHITECTURE)" \
< ..\ecl\config-internal.h.msvc6 > ..\ecl\config-internal.h
xcopy /SYI $(top_srcdir)\h\*.h ..\ecl
-mkdir ..\ecl\atomic_ops
-mkdir ..\ecl\atomic_ops\sysdeps
xcopy /SYI $(top_srcdir)\bdwgc\libatomic_ops\src\atomic_ops.h ..\ecl
xcopy /SYI $(top_srcdir)\bdwgc\libatomic_ops\src\atomic_ops\generalize*.h ..\ecl\atomic_ops
xcopy /SYI $(top_srcdir)\bdwgc\libatomic_ops\src\atomic_ops\sysdeps\* ..\ecl\atomic_ops\sysdeps
xcopy /SYI $(top_srcdir)\bdwgc\libatomic_ops\src\atomic_ops\ao_version.h ..\ecl\atomic_ops
#
# GCC might break this code
#
gbc.o: gbc.c $(HFILES)
$(CC) $(CFLAGS) -O0 gbc.c /Fo$@
#
# This reduces the overhead of jumping to other functions
#
apply.o: apply.c $(HFILES) $(HDIR)/cs.h
$(CC) $(CFLAGS) apply.c /Fo$@
#
# These files are interrelated
#
all_symbols.obj: all_symbols.c
$(CC) $(CFLAGS) -Fo$@ -I../ -I./ all_symbols.c
all_symbols2.obj: all_symbols.c
$(CC) $(CFLAGS) -DECL_FINAL -Fo$@ -I../ -I./ all_symbols.c
#
# This is in another directory
#
../cinit.obj: cinit.c
$(CC) $(CFLAGS) -I../ -I./ /Focinit.obj cinit.c
$(MV) cinit.obj ..\

View file

View file

@ -1,143 +0,0 @@
top_srcdir= ..\..\src
srcdir = ..\..\src\doc
prefix=@prefix@
exec_prefix=$(prefix)\bin
infodir = $(prefix)\doc\info
mandir=$(prefix)\doc\man
docdir=$(prefix)\doc
manext=1
INFOEXT = info
SHELL = cmd.exe
INSTALL = ..\install.bat
INSTALL_PROGRAM = $(INSTALL)
INSTALL_DATA = $(INSTALL)
INSTALL_INFO = $(INSTALL)
mkinstalldirs = $(SHELL) $(top_srcdir)/gc/mkinstalldirs.bat
INFO_FILES = ecl.$(INFOEXT) ecldev.$(INFOEXT)
HTML_FILES = index.html license.html lgpl.html news.html benchmark.html \
install.html download.html
RM = erase
RMDIR = rmdir /S /Q
MKDIR = mkdir
CP = copy /Y
ECL_VERSION=0.9f
FILTER = ..\c\cut$(EXE) "@PACKAGE_VERSION@" "$(ECL_VERSION)"
ECL = ../ecl
all: $(INFO_FILES) $(HTML_FILES) developers_manual user_manual_manual
ecl.dvi: $(srcdir)/user.txi $(srcdir)/macros.txi clisp.sty ecl.sty
tex $(srcdir)/user.txi
ecldev.dvi: $(srcdir)/devel.txi $(srcdir)/macros.txi clisp.sty ecl.sty
tex $(srcdir)/devel.txi
ecl.ps: ecl.dvi $(srcdir)/macros.txi
dvips -o $@ ecl.dvi
ecldev.ps: ecldev.dvi $(srcdir)/macros.txi
dvips -o $@ ecldev.dvi
install: all
IF NOT EXIST $(docdir) $(MKDIR) $(docdir)
for %i in (LICENSE LGPL) do $(CP) $(top_srcdir)\..\%i $(docdir)
for %i in ($(HTML_FILES)) do $(CP) %i $(docdir)
IF NOT EXIST $(docdir)\ecldev $(MKDIR) $(docdir)\ecldev
for %i in (ecldev\*) do $(CP) %i $(docdir)\ecldev
IF NOT EXIST $(docdir)\ecl $(MKDIR) $(docdir)\ecl
for %i in (ecl\*) do $(CP) %i $(docdir)\ecl
flatinstall: all
IF NOT EXIST $(docdir) $(MKDIR) $(docdir)
for %i in (LICENSE LGPL) do $(CP) $(top_srcdir)\..\%i $(docdir)
for %i in ($(HTML_FILES)) do $(CP) %i $(docdir)
IF NOT EXIST $(docdir)\ecldev $(MKDIR) $(docdir)\ecldev
for %i in (ecldev\*) do $(CP) %i $(docdir)\ecldev
IF NOT EXIST $(docdir)\ecl $(MKDIR) $(docdir)\ecl
for %i in (ecl\*) do $(CP) %i $(docdir)\ecl
uninstall:
for k in $(INFO_FILES); do \
rm $$k; \
if [ -x $(INSTALL_INFO) ]; then \
$(INSTALL_INFO) --delete $$k; \
fi; \
done
rm -r $(infodir)/ecl.$(INFOEXT) $(infodir)/ecldev.$(INFOEXT); \
rm $(mandir)/man$(manext)/ecl.$(manext)
head2: developers_manual user_manual manual $(srcdir)/head Makefile
IF EXIST ecl\index.html ( \
..\c\cut.exe "ecl/user.html" "ecl/index.html" \
"ecldev/devel.html" "ecldev/index.html" \
< $(srcdir)/head > head2 \
) ELSE ( \
copy /y $(srcdir)/head head2 \
)
ecl.info.gz: ecl.info
gzip < ecl.info > ecl.info.gz
ecldev.info.gz: ecldev.info
gzip < ecldev.info > ecldev.info.gz
ecl.info: $(srcdir)/user.txi $(srcdir)/macros.txi
makeinfo -I $(srcdir) --no-split $(srcdir)/user.txi
ecldev.info: $(srcdir)/devel.txi $(srcdir)/macros.txi
makeinfo -I $(srcdir) --no-split $(srcdir)/devel.txi
download.html: $(srcdir)/download.in.html head2
( type head2 $(srcdir)\download.in.html $(srcdir)\end ) | $(FILTER) > $@
index.html: $(srcdir)/index.in.html head2
( type head2 $(srcdir)\index.in.html $(srcdir)\end ) | $(FILTER) > $@
install.html: $(srcdir)/install.in.html head2
( type head2 $(srcdir)\install.in.html $(srcdir)\end ) | $(FILTER) > $@
news.html: $(srcdir)\..\..\ANNOUNCEMENT head2
type head2 > html_tmp
echo "<pre>" >> html_tmp
type $(srcdir)\..\..\ANNOUNCEMENT >> html_tmp
echo "</pre>" >> html_tmp
type $(srcdir)\end >> html_tmp
$(FILTER) < html_tmp > $@
$(RM) html_tmp
benchmark.html: $(srcdir)/benchmark.in.html ../gabriel/BENCHMARK head2
type head2 > html_tmp
type $(srcdir)\benchmark.in.html >> html_tmp
echo "<pre>" >> html_tmp
type ..\gabriel\BENCHMARK >> html_tmp
echo "</pre>" >> html_tmp
type $(srcdir)\end >> html_tmp
$(FILTER) < html_tmp > $@
$(RM) html_tmp
../gabriel/BENCHMARK:
if not exist ..\gabriel $(MKDIR) ..\gabriel
echo No benchmarks available > ..\gabriel\BENCHMARK
license.html: $(top_srcdir)/../LICENSE head2
type head2 > html_tmp
echo "<pre>" >> html_tmp
type $(top_srcdir)\..\LICENSE >> html_tmp
echo "</pre>" >> html_tmp
type $(srcdir)\end >> html_tmp
$(FILTER) < html_tmp > $@
$(RM) html_tmp
lgpl.html: $(top_srcdir)/../LGPL head2
type head2 > html_tmp
echo "<pre>" >> html_tmp
type $(top_srcdir)\..\LGPL >> html_tmp
echo "</pre>" >> html_tmp
type $(srcdir)\end >> html_tmp
$(FILTER) < html_tmp > $@
$(RM) html_tmp
user_manual: $(srcdir)/user.txi $(srcdir)/macros.txi
echo "Producing ecl.html; ignore error messages."
IF NOT EXIST ecl MKDIR ecl
makeinfo -v -I $(srcdir) --html $(srcdir)/user.txi
echo > user_manual
developers_manual: $(srcdir)/devel.txi $(srcdir)/macros.txi
echo "Producing ecldev.html; ignore error messages."
IF NOT EXIST ecldev MKDIR ecldev
makeinfo -v -I $(srcdir) --html $(srcdir)/devel.txi
echo > developers_manual
clean:
-for %i in (ecl ecldev ..\gabriel) do $(RMDIR) %i
-for %i in (ecl.info* ecldev.info* $(HTML_FILES) head2 user_manual developers_manual_manual ..\gabriel\BENCHMARK) do $(RM) %i

View file

@ -1,204 +0,0 @@
/*
* FEATURES LINKED IN:
*/
/* Locatives */
/* #undef LOCATIVE */
/* Use old MIT LOOP macro system */
/* #undef ECL_OLD_LOOP */
/* Define this if you want a runtime version only without compiler */
/* #undef RUNTIME */
/* Profile tool */
/* #undef PROFILE */
/* Program Development Environment */
/* #undef PDE */
/* Allow loading dynamically linked code */
#define ENABLE_DLOPEN 1
/* Undefine this if you do not want ECL to check for circular lists */
#define ECL_SAFE
/* Use CMU Common-Lisp's FORMAT routine */
#define ECL_CMU_FORMAT 1
/* Assembler implementation of APPLY and friends */
/* #undef ECL_ASM_APPLY */
/* Activate Boehm-Weiser incremental garbage collector */
/* #undef GBC_BOEHM_GENGC */
#define ECL_WEAK_HASH
/*
* SYSTEM FEATURES:
*/
/* Arguments cannot be accessed as array */
/* #undef NO_ARGS_ARRAY */
/* Most significant byte first */
/* #undef WORDS_BIGENDIAN */
/* Has <sys/resource.h> */
/* #undef HAVE_SYS_RESOURCE_H */
/* #undef HAVE_ULIMIT_H */
/* High precision timer */
/* #undef HAVE_NANOSLEEP */
/* Float version if isnan() */
/* #undef HAVE_ISNANF */
/* float.h for epsilons, maximum real numbers, etc */
#define HAVE_FLOAT_H 1
/* select() */
/* #undef HAVE_SELECT */
/* #undef HAVE_SYS_SELECT_H */
/* #undef HAVE_SYS_IOCTL_H */
/* putenv() or setenv() */
#undef HAVE_SETENV
#define HAVE_PUTENV 1
/* times() and sys/times.h */
/* #undef HAVE_TIMES */
/* gettimeofday() and sys/time.h */
/* #undef HAVE_GETTIMEOFDAY */
/* getrusage() and sys/resource.h */
/* #undef HAVE_GETRUSAGE */
/* user home directory, user name, etc... */
/* #undef HAVE_PW_H */
/* symbolic links and checking their existence */
/* #undef HAVE_LSTAT */
/* safe creation of temporary files */
/* #undef HAVE_MKSTEMP */
/* timer for userland threads */
/* #undef HAVE_ALARM */
/* filesytem */
/* #undef HAVE_DIRENT_H */
/* dynamic linking of libraries */
/* #undef HAVE_DLFCN_H */
/* #undef HAVE_LINK_H */
/* #undef HAVE_MACH_O_DYLD_H */
/* POSIX signals */
/* #undef HAVE_SIGPROCMASK */
/* isatty() checks whether a file is connected to a */
#define HAVE_ISATTY 1
/* do we want to have signed zeros */
#define ECL_SIGNED_ZERO 1
/* has support for large files */
/* #undef HAVE_FSEEKO */
/* compiler understands long long */
#define HAVE_LONG_LONG 1
/* the tzset() function gets the current time zone */
#define HAVE_TZSET 1
/* several floating point functions (ISO C99) */
#if 0
#undef HAVE_EXPF
#undef HAVE_LOGF
#undef HAVE_SQRTF
#undef HAVE_COSF
#undef HAVE_SINF
#undef HAVE_TANF
#undef HAVE_SINHF
#undef HAVE_COSHF
#undef HAVE_TANHF
#endif
#define HAVE_FLOORF
#define HAVE_CEILF
#define HAVE_FABSF
#define HAVE_FREXPF
#define HAVE_LDEXPF
#define HAVE_LOG1PL
/* whether we have sched_yield() that gives priority to other threads */
/* #undef HAVE_SCHED_YIELD */
/* uname() for system identification */
/* #undef HAVE_UNAME */
/* #undef HAVE_UNISTD_H */
/* #undef HAVE_SYS_WAIT_H */
/* size of long long */
#define ECL_LONG_LONG_BITS 64
#define HAVE_SYSTEM
/* what characters are used to mark beginning of new line */
#define ECL_NEWLINE_IS_CRLF 1
/* #undef ECL_NEWLINE_IS_LFCR */
/*
* PARAMETERS:
*/
/*
* Memory limits for the old garbage collector.
*/
#define LISP_PAGESIZE 2048 /* Page size in bytes */
#define MAXPAGE 65536 /* Maximum Memory Size */
/* We allocate a number of strings in a pool which is used to speed up reading */
#define ECL_MAX_STRING_POOL_SIZE 10
#define ECL_BUFFER_STRING_SIZE 128
/*
* Macros that depend on these system features.
*/
#if defined(sparc) || defined(i386) || defined(mips)
# define stack_align(n) (((n) + 0x7) & ~0x7)
#else
# define stack_align(n) (((n) + 03) & ~03)
#endif
#undef FILE_CNT
#if 0 == 1
# define FILE_CNT(fp) ((fp)->_IO_read_end - (fp)->_IO_read_ptr)
#endif
#if 0 == 2
# define FILE_CNT(fp) ((fp)->_r)
#endif
#if ( defined(_MSC_VER) && (_MSC_VER < 1900) ) && 3 == 3
# define FILE_CNT(fp) ((fp)->_cnt)
#endif
#if defined(cygwin) || defined(mingw32) || defined(_MSC_VER)
# define IS_DIR_SEPARATOR(x) ((x=='/')||(x=='\\'))
# define DIR_SEPARATOR '/'
# define PATH_SEPARATOR ';'
#else
# define IS_DIR_SEPARATOR(x) (x=='/')
# define DIR_SEPARATOR '/'
# define PATH_SEPARATOR ':'
#endif
#define ECL_ARCHITECTURE "@ARCHITECTURE@"
#ifdef ECL_AVOID_FPE_H
# define ecl_detect_fpe()
#else
# include "@ECL_FPE_CODE@"
#endif
#define strcasecmp _stricmp
#if defined(_MSC_VER) && (_MSC_VER < 1800)
#define isnan _isnan
#endif
#define finite _finite
#define sleep _sleep
#include "@ECL_FPE_CODE@"
#include <math.h>
#ifndef isfinite
# define isfinite(x) (finite(x))
# define signbit(x) (_copysign(1.0,(x)) < 0)
# define ECL_MATHERR_CLEAR
# define ECL_MATHERR_TEST
#endif
#if defined(HAVE_LIBFFI) && defined(ECL_INCLUDE_FFI_H)
#include "@ECL_LIBFFI_HEADER@"
#endif
#define ECL_WINDOWS_BACKTRACE @ECL_USE_DBGHELP@
#if !ECL_WINDOWS_BACKTRACE
#undef ECL_WINDOWS_BACKTRACE
#endif
#if defined(HAVE_SYS_RESOURCE_H) && defined(RLIMIT_STACK) && !defined(NACL)
#define ECL_DEFAULT_C_STACK_SIZE 0 /* Use the stack size provided by the OS */
#else
#define ECL_DEFAULT_C_STACK_SIZE @ECL_DEFAULT_C_STACK_SIZE@
#endif

View file

@ -1,241 +0,0 @@
/*
* Copyright (c) 1990, Giuseppe Attardi.
* Copyright (c) 2001, Juan Jose Garcia Ripoll.
*
* See file 'LICENSE' for the copyright details.
*
*/
/* config.h.in -- Template configuration file. */
#define ECL_MS_WINDOWS_HOST
/*
* If ECL_API has been predefined, that means we are building the core
* library and, under windows, we must tell the compiler to export
* extern functions from the shared library.
* If ECL_API is not defined, we are simply building an application that
* uses ECL and, under windows, we must tell the compiler that certain
* will be imported from a DLL.
*/
#if defined(mingw32) || defined(_MSC_VER) || defined(cygwin)
# define ECL_DLLEXPORT __declspec(dllexport)
# ifdef ECL_API
# undef ECL_API
# define ECL_API __declspec(dllexport)
# else
# define ECL_API __declspec(dllimport)
# endif
#else
# define ECL_DLLEXPORT
# ifdef ECL_API
# undef ECL_API
# endif
# define ECL_API
#endif
/* Decimal number made with the formula yymmvv */
#define ECL_VERSION_NUMBER @ECL_VERSION_NUMBER@
/*
* FEATURES LINKED IN
*/
/* Always use CLOS */
#define CLOS
/* Use GNU Multiple Precision library for bignums */
#define WITH_GMP 1
#include <ecl/gmp.h>
/* Userland threads? */
#define ECL_THREADS @ECL_THREADS@
#if ECL_THREADS
# define GC_WIN32_THREADS
# define GC_THREADS
# define ECL_WINDOWS_THREADS
#else
# undef ECL_THREADS
#endif
/* Use Windows's interlocked operations to define compare-and-swap */
#define AO_ASSUME_WINDOWS98
/* Use Boehm's garbage collector */
#define GBC_BOEHM 0
#ifdef GBC_BOEHM
# define GC_WIN32_THREADS /* For older versions */
# define GC_THREADS /* For >= 7.2 */
# include <ecl/gc/gc.h>
# define ECL_DYNAMIC_VV
#endif
#define ECL_LIBATOMIC_OPS_H
/* Network streams */
#define ECL_TCP 1
#if defined(ECL_TCP) && (defined(_MSC_VER) || defined(mingw32))
# define ECL_WSOCK
#endif
/* Foreign functions interface */
#define ECL_FFI
#define HAVE_WCHAR_H
/* Support for Unicode strings */
#define ECL_UNICODE @ECL_UNICODE@
#if !ECL_UNICODE
# undef ECL_UNICODE
#endif
/* Allow STREAM operations to work on arbitrary objects */
#define ECL_CLOS_STREAMS 1
/* Stack grows downwards */
#define ECL_DOWN_STACK 1
/* We have libffi and can use it */
/*#undef HAVE_LIBFFI*/
/* Use mprotect for fast interrupt dispatch */
/* #undef ECL_USE_MPROTECT */
#if defined(_MSC_VER) || defined(mingw32)
# define ECL_USE_GUARD_PAGE
#endif
/* Integer types */
#define ecl_uint8_t unsigned char
#define ecl_int8_t char
#define ecl_uint16_t unsigned short
#define ecl_int16_t short
#define ecl_uint32_t unsigned int
#define ecl_int32_t int
#define ecl_uint64_t unsigned __int64
#define ecl_int64_t __int64
#define ecl_long_long_t long long
#define ecl_ulong_long_t unsigned long long
/*
* C TYPES AND SYSTEM LIMITS
*/
/*
* The integer type
*
* cl_fixnum must be an integer type, large enough to hold a pointer.
* Ideally, according to the ISOC99 standard, we should use intptr_t,
* but the required headers are not present in all systems. Hence we
* use autoconf to guess the following values.
*/
#ifdef _M_X64
#define ECL_INT_BITS 32
#define ECL_LONG_BITS 32
#define ECL_FIXNUM_BITS 64
#define MOST_POSITIVE_FIXNUM ((cl_fixnum)2305843009213693951LL)
#define MOST_NEGATIVE_FIXNUM ((cl_fixnum)-2305843009213693952LL)
typedef long long cl_fixnum;
typedef unsigned long long cl_index;
typedef unsigned long long cl_hashkey;
#else
#define ECL_INT_BITS 32
#define ECL_LONG_BITS 32
#define ECL_FIXNUM_BITS 32
#define MOST_POSITIVE_FIXNUM ((cl_fixnum)536870911)
#define MOST_NEGATIVE_FIXNUM ((cl_fixnum)-536870912)
typedef int cl_fixnum;
typedef unsigned int cl_index;
typedef unsigned int cl_hashkey;
#endif
#define ECL_BIGNUM_REGISTER_NUMBER 3
/*
* The character type
*/
#ifdef ECL_UNICODE
#define ECL_CHAR_CODE_LIMIT 1114112 /* unicode character code limit */
#else
#define ECL_CHAR_CODE_LIMIT 256 /* unicode character code limit */
#endif
typedef int ecl_character;
typedef unsigned char ecl_base_char;
/*
* Array limits
*/
#define ECL_ARRAY_RANK_LIMIT 64
#ifdef GBC_BOEHM
#define ECL_ARRAY_DIMENSION_LIMIT MOST_POSITIVE_FIXNUM
#else
#define ECL_ARRAY_DIMENSION_LIMIT 16*1024*1024
#endif
#define ECL_ARRAY_TOTAL_LIMIT ECL_ARRAY_DIMENSION_LIMIT
/*
* Function limits.
*
* In general, any of these limits must fit in a "signed int".
*/
/* Maximum number of function arguments */
#define ECL_CALL_ARGUMENTS_LIMIT 65536
/* Maximum number of required arguments */
#define ECL_LAMBDA_PARAMETERS_LIMIT ECL_CALL_ARGUMENTS_LIMIT
/* Numb. of args. which can be passed using the C stack */
/* See cmplam.lsp if you change this value */
#define ECL_C_ARGUMENTS_LIMIT 63
/* Maximum number of output arguments */
#define ECL_MULTIPLE_VALUES_LIMIT 64
/* A setjmp that does not save signals */
#define ecl_setjmp setjmp
#define ecl_longjmp longjmp
/*
* Structure/Instance limits. The index to a slot must fit in the
* "int" type. We also require ECL_SLOTS_LIMIT <= CALL_ARGUMENTS_LIMIT
* because constructors typically require as many arguments as slots,
* or more.
*/
#define ECL_SLOTS_LIMIT 32768
/* compiler understands long double */
#define ECL_LONG_FLOAT
/* compiler understands complex */
/* #undef ECL_COMPLEX_FLOAT */
/* do we want NaNs and Infs */
#define ECL_IEEE_FP 1
/* can manipulate floating point environment */
/* #undef HAVE_FENV_H */
/* can activate individual traps in floating point environment */
/* #undef HAVE_FEENABLEEXCEPT */
/* do we want to deactivate all support for floating point exceptions */
/* #undef ECL_AVOID_FPE_H */
/* Missing integer types */
#if _MSC_VER < 1600
typedef signed char int8_t;
typedef signed short int16_t;
typedef signed int int32_t;
typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef unsigned int uint32_t;
#else
#include <stdint.h>
#endif
/* We can use small, two-words conses, without type information */
/* #undef ECL_SMALL_CONS */
#define ECL_INLINE
#define ecl_likely(form) (form)
#define ecl_unlikely(form) (form)
#define ecl_attr_noreturn
#if defined(__SSE2__) || (defined(_M_IX86_FP) && _M_IX86_FP >= 2)
#define ECL_SSE2 @ECL_SSE2@
#if !ECL_SSE2
#undef ECL_SSE2
#endif
#endif

View file

@ -1,57 +0,0 @@
# Makefile for Windows NT. Assumes Microsoft compiler.
# DLLs are included in the root set under NT, but not under win32S.
# Use "nmake nodebug=1 all" for optimized versions of library, gctest and editor.
# MY_CPU=i386 for 32 bits or AMD64 for 64 bits
CPU=$(MY_CPU)
#!include <ntwin32.mak>
srcdir=..\..\src\bdwgc
!if "$(ECL_THREADS)" != ""
THREADS_OBJ= win32_threads.obj thread_local_alloc.obj
THREADS_FLAGS= -D_CRT_SECURE_NO_WARNINGS -DGC_DLL -DGC_BUILD -DGC_WIN32_THREADS -DTHREAD_LOCAL_ALLOC -DLARGE_CONFIG
!else
THREADS_OBJ=
THREADS_FLAGS= -D_CRT_SECURE_NO_WARNINGS -DGC_DLL -DGC_BUILD -DLARGE_CONFIG
!endif
# Make sure that .cc is not viewed as a suffix. It is for VC++2005, but
# not earlier versions. We can deal with either, but not inconsistency.
.SUFFIXES:
.SUFFIXES: .obj .cpp .c
# Atomic_ops installation directory. For win32, the source directory
# should do, since we only need the headers.
# We assume this was manually unpacked, since I'm not sure there is
# a Windows standard command line tool to do this.
AO_SRC_DIR=$(srcdir)/libatomic_ops/src
AO_INCLUDE_DIR=$(AO_SRC_DIR)
OBJS= alloc.obj reclaim.obj allchblk.obj misc.obj mach_dep.obj os_dep.obj mark_rts.obj headers.obj mark.obj obj_map.obj blacklst.obj finalize.obj new_hblk.obj dbg_mlc.obj fnlz_mlc.obj malloc.obj dyn_load.obj typd_mlc.obj ptr_chck.obj gc_cpp.obj mallocx.obj win32_threads.obj msvc_dbg.obj thread_local_alloc.obj
all: gc.lib
{$(srcdir)\extra}.c{}.obj:
$(CC) -nologo -c -DWIN32 -D_MT $(CFLAGS_CONFIG) -Iinclude -I$(AO_INCLUDE_DIR) $(THREADS_FLAGS) -I$(srcdir)\include $< /Fo$*.obj
{$(srcdir)}.c{}.obj:
$(CC) -nologo -c -DWIN32 -D_MT $(CFLAGS_CONFIG) -Iinclude -I$(AO_INCLUDE_DIR) $(THREADS_FLAGS) -I$(srcdir)\include $< /Fo$*.obj
{$(srcdir)}.cpp{}.obj:
$(CC) -nologo -c -DWIN32 -D_MT $(CFLAGS_CONFIG) -Iinclude -I$(AO_INCLUDE_DIR) $(THREADS_FLAGS) -I$(srcdir)\include $< /Fo$*.obj
gc.lib: $(OBJS)
!if "$(CPU)" == "i386"
lib /MACHINE:i386 /out:gc.lib $(OBJS)
!else
lib /MACHINE:X64 /out:gc.lib $(OBJS)
!endif
gc_cpp.cpp: $(srcdir)\gc_cpp.cc
copy $? $@
clean:
-erase $(OBJS)
-erase gc.lib
-erase *.pdb

View file

@ -1,670 +0,0 @@
srcdir = ..\..\src\gmp
#
# Change for other configurations
#
!if "$(BITS)" == "64"
YASM_TARGET=win64
YASM_FLAGS=-D PIC
D_WIN64=/D "WIN64"
!if "$(MPN_TYPE)" == ""
MPN_TYPE=amd64
!endif
BITS=64
!else
YASM_TARGET=win32
YASM_FLAGS=
D_WIN64=
BITS=32
!if "$(MPN_TYPE)" == ""
MPN_TYPE=p3
!endif
!endif
!if "$(YASM)" == ""
YASM=yasm-1.2.0-win32.exe
!endif
# for amd
MPN_AMD64_C_SOURCES = \
mpn\generic\add.c \
mpn\generic\add_1.c \
mpn\generic\add_n.c \
mpn\generic\addmul_1.c \
mpn\generic\addsub_n.c \
mpn\generic\bdivmod.c \
mpn\generic\cmp.c \
mpn\generic\dc_divrem_n.c \
mpn\generic\dive_1.c \
mpn\generic\diveby3.c \
mpn\generic\divis.c \
mpn\generic\divrem.c \
mpn\generic\divrem_1.c \
mpn\generic\divrem_2.c \
mpn\generic\dump.c \
mpn\generic\fib2_ui.c \
mpn\generic\gcd.c \
mpn\generic\gcd_1.c \
mpn\generic\gcdext.c \
mpn\generic\get_d.c \
mpn\generic\get_str.c \
mpn\generic\jacbase.c \
mpn\generic\lshift.c \
mpn\generic\mod_1.c \
mpn\generic\mod_34lsub1.c \
mpn\generic\mode1o.c \
mpn\generic\mul.c \
mpn\generic\mul_1.c \
mpn\generic\mul_basecase.c \
mpn\generic\mul_fft.c \
mpn\generic\mul_n.c \
mpn\generic\mullow_basecase.c \
mpn\generic\mullow_n.c \
mpn\generic\perfsqr.c \
.\mpn\generic\popham2.c \
mpn\generic\pow_1.c \
mpn\generic\pre_divrem_1.c \
mpn\generic\pre_mod_1.c \
mpn\generic\random.c \
mpn\generic\random2.c \
mpn\generic\rootrem.c \
mpn\generic\rshift.c \
mpn\generic\sb_divrem_mn.c \
mpn\generic\scan0.c \
mpn\generic\scan1.c \
mpn\generic\set_str.c \
mpn\generic\sizeinbase.c \
mpn\generic\sqr_basecase.c \
mpn\generic\sqrtrem.c \
mpn\generic\sub.c \
mpn\generic\sub_1.c \
mpn\generic\sub_n.c \
mpn\generic\submul_1.c \
mpn\generic\tdiv_qr.c
# for p4
MPN_P4_C_SOURCES = \
mpn\generic\add.c \
mpn\generic\add_1.c \
mpn\generic\addsub_n.c \
mpn\generic\bdivmod.c \
mpn\generic\cmp.c \
mpn\generic\dc_divrem_n.c \
mpn\generic\diveby3.c \
mpn\generic\divis.c \
mpn\generic\divrem.c \
mpn\generic\divrem_2.c \
mpn\generic\dump.c \
mpn\generic\fib2_ui.c \
mpn\generic\gcd.c \
mpn\generic\gcd_1.c \
mpn\generic\gcdext.c \
mpn\generic\get_d.c \
mpn\generic\get_str.c \
mpn\generic\jacbase.c \
mpn\generic\mul.c \
mpn\generic\mul_fft.c \
mpn\generic\mul_n.c \
mpn\generic\mullow_basecase.c \
mpn\generic\mullow_n.c \
mpn\generic\perfsqr.c \
mpn\generic\pow_1.c \
mpn\generic\pre_divrem_1.c \
mpn\generic\pre_mod_1.c \
mpn\generic\random.c \
mpn\generic\random2.c \
mpn\generic\rootrem.c \
mpn\generic\sb_divrem_mn.c \
mpn\generic\scan0.c \
mpn\generic\scan1.c \
mpn\generic\set_str.c \
mpn\generic\sizeinbase.c \
mpn\generic\sqrtrem.c \
mpn\generic\sub.c \
mpn\generic\sub_1.c \
mpn\generic\tdiv_qr.c
# for p3
MPN_P3_C_SOURCES = \
mpn\generic\add.c \
mpn\generic\add_1.c \
mpn\generic\addsub_n.c \
mpn\generic\bdivmod.c \
mpn\generic\cmp.c \
mpn\generic\dc_divrem_n.c \
mpn\generic\diveby3.c \
mpn\generic\divis.c \
mpn\generic\divrem.c \
mpn\generic\divrem_2.c \
mpn\generic\dump.c \
mpn\generic\fib2_ui.c \
mpn\generic\gcd.c \
mpn\generic\gcd_1.c \
mpn\generic\gcdext.c \
mpn\generic\get_d.c \
mpn\generic\get_str.c \
mpn\generic\jacbase.c \
mpn\generic\mul.c \
mpn\generic\mul_fft.c \
mpn\generic\mul_n.c \
mpn\generic\mullow_basecase.c \
mpn\generic\mullow_n.c \
mpn\generic\perfsqr.c \
mpn\generic\pow_1.c \
mpn\generic\random.c \
mpn\generic\random2.c \
mpn\generic\rootrem.c \
mpn\generic\sb_divrem_mn.c \
mpn\generic\scan0.c \
mpn\generic\scan1.c \
mpn\generic\set_str.c \
mpn\generic\sizeinbase.c \
mpn\generic\sqrtrem.c \
mpn\generic\sub.c \
mpn\generic\sub_1.c \
mpn\generic\tdiv_qr.c
MPN_GC_SOURCES = \
mpn\generic\add.c \
mpn\generic\add_1.c \
mpn\generic\add_n.c \
mpn\generic\addmul_1.c \
.\mpn\generic\addsub_n.c \
mpn\generic\bdivmod.c \
mpn\generic\cmp.c \
mpn\generic\dc_divrem_n.c \
mpn\generic\dive_1.c \
mpn\generic\diveby3.c \
mpn\generic\divis.c \
mpn\generic\divrem.c \
.\mpn\generic\divrem_1.c \
.\mpn\generic\divrem_2.c \
mpn\generic\dump.c \
mpn\generic\fib2_ui.c \
mpn\generic\gcd.c \
mpn\generic\gcd_1.c \
mpn\generic\gcdext.c \
mpn\generic\get_d.c \
mpn\generic\get_str.c \
mpn\generic\jacbase.c \
mpn\generic\lshift.c \
mpn\generic\mod_1.c \
mpn\generic\mod_34lsub1.c \
mpn\generic\mode1o.c \
mpn\generic\mul.c \
mpn\generic\mul_1.c \
mpn\generic\mul_basecase.c \
mpn\generic\mul_fft.c \
mpn\generic\mul_n.c \
mpn\generic\mullow_basecase.c \
mpn\generic\mullow_n.c \
mpn\generic\perfsqr.c \
.\mpn\generic\popham2.c \
mpn\generic\pow_1.c \
mpn\generic\pre_divrem_1.c \
mpn\generic\pre_mod_1.c \
mpn\generic\random.c \
mpn\generic\random2.c \
mpn\generic\rootrem.c \
mpn\generic\rshift.c \
mpn\generic\sb_divrem_mn.c \
mpn\generic\scan0.c \
mpn\generic\scan1.c \
mpn\generic\set_str.c \
mpn\generic\sizeinbase.c \
mpn\generic\sqr_basecase.c \
mpn\generic\sqrtrem.c \
mpn\generic\sub.c \
mpn\generic\sub_1.c \
mpn\generic\sub_n.c \
mpn\generic\submul_1.c \
mpn\generic\tdiv_qr.c
MPF_SOURCES = \
mpf\abs.c \
mpf\add.c \
mpf\add_ui.c \
mpf\ceilfloor.c \
mpf\clear.c \
mpf\cmp.c \
mpf\cmp_d.c \
mpf\cmp_si.c \
mpf\cmp_ui.c \
mpf\div.c \
mpf\div_2exp.c \
mpf\div_ui.c \
mpf\dump.c \
mpf\eq.c \
mpf\fits_sint.c \
mpf\fits_slong.c \
mpf\fits_sshort.c \
mpf\fits_uint.c \
mpf\fits_ulong.c \
mpf\fits_ushort.c \
mpf\get_d.c \
mpf\get_d_2exp.c \
mpf\get_dfl_prec.c \
mpf\get_prc.c \
mpf\get_si.c \
mpf\get_str.c \
mpf\get_ui.c \
mpf\init.c \
mpf\init2.c \
mpf\inp_str.c \
mpf\int_p.c \
mpf\iset.c \
mpf\iset_d.c \
mpf\iset_si.c \
mpf\iset_str.c \
mpf\iset_ui.c \
mpf\mul.c \
mpf\mul_2exp.c \
mpf\mul_ui.c \
mpf\neg.c \
mpf\out_str.c \
mpf\pow_ui.c \
mpf\random2.c \
mpf\reldiff.c \
mpf\set.c \
mpf\set_d.c \
mpf\set_dfl_prec.c \
mpf\set_prc.c \
mpf\set_prc_raw.c \
mpf\set_q.c \
mpf\set_si.c \
mpf\set_str.c \
mpf\set_ui.c \
mpf\set_z.c \
mpf\size.c \
mpf\sqrt.c \
mpf\sqrt_ui.c \
mpf\sub.c \
mpf\sub_ui.c \
mpf\swap.c \
mpf\trunc.c \
mpf\ui_div.c \
mpf\ui_sub.c \
mpf\urandomb.c \
MPZ_SOURCES = \
mpz\abs.c \
mpz\add.c \
mpz\add_ui.c \
mpz\and.c \
mpz\aorsmul.c \
mpz\aorsmul_i.c \
mpz\array_init.c \
mpz\bin_ui.c \
mpz\bin_uiui.c \
mpz\cdiv_q.c \
mpz\cdiv_q_ui.c \
mpz\cdiv_qr.c \
mpz\cdiv_qr_ui.c \
mpz\cdiv_r.c \
mpz\cdiv_r_ui.c \
mpz\cdiv_ui.c \
mpz\cfdiv_q_2exp.c \
mpz\cfdiv_r_2exp.c \
mpz\clear.c \
mpz\clrbit.c \
mpz\cmp.c \
mpz\cmp_d.c \
mpz\cmp_si.c \
mpz\cmp_ui.c \
mpz\cmpabs.c \
mpz\cmpabs_d.c \
mpz\cmpabs_ui.c \
mpz\com.c \
mpz\combit.c \
mpz\cong.c \
mpz\cong_2exp.c \
mpz\cong_ui.c \
mpz\dive_ui.c \
mpz\divegcd.c \
mpz\divexact.c \
mpz\divis.c \
mpz\divis_2exp.c \
mpz\divis_ui.c \
mpz\dump.c \
mpz\export.c \
mpz\fac_ui.c \
mpz\fdiv_q.c \
mpz\fdiv_q_ui.c \
mpz\fdiv_qr.c \
mpz\fdiv_qr_ui.c \
mpz\fdiv_r.c \
mpz\fdiv_r_ui.c \
mpz\fdiv_ui.c \
mpz\fib2_ui.c \
mpz\fib_ui.c \
mpz\fits_sint.c \
mpz\fits_slong.c \
mpz\fits_sshort.c \
mpz\fits_uint.c \
mpz\fits_ulong.c \
mpz\fits_ushort.c \
mpz\gcd.c \
mpz\gcd_ui.c \
mpz\gcdext.c \
mpz\get_d.c \
mpz\get_d_2exp.c \
mpz\get_si.c \
mpz\get_str.c \
mpz\get_ui.c \
mpz\getlimbn.c \
mpz\hamdist.c \
mpz\import.c \
mpz\init.c \
mpz\init2.c \
mpz\inp_raw.c \
mpz\inp_str.c \
mpz\invert.c \
mpz\ior.c \
mpz\iset.c \
mpz\iset_d.c \
mpz\iset_si.c \
mpz\iset_str.c \
mpz\iset_ui.c \
mpz\lcm.c \
mpz\lcm_ui.c \
mpz\lucnum2_ui.c \
mpz\lucnum_ui.c \
mpz\millerrabin.c \
mpz\mod.c \
mpz\mul.c \
mpz\mul_2exp.c \
mpz\mul_si.c \
mpz\mul_ui.c \
mpz\n_pow_ui.c \
mpz\neg.c \
mpz\nextprime.c \
mpz\out_raw.c \
mpz\out_str.c \
mpz\perfpow.c \
mpz\perfsqr.c \
mpz\popcount.c \
mpz\pow_ui.c \
mpz\powm.c \
mpz\powm_ui.c \
mpz\pprime_p.c \
mpz\random.c \
mpz\random2.c \
mpz\realloc.c \
mpz\realloc2.c \
mpz\remove.c \
mpz\root.c \
mpz\rootrem.c \
mpz\rrandomb.c \
mpz\scan0.c \
mpz\scan1.c \
mpz\set.c \
.\mpz\set_d.c \
mpz\set_f.c \
mpz\set_q.c \
mpz\set_si.c \
mpz\set_str.c \
mpz\set_ui.c \
mpz\setbit.c \
mpz\size.c \
mpz\sizeinbase.c \
mpz\sqrt.c \
mpz\sqrtrem.c \
mpz\sub.c \
mpz\sub_ui.c \
mpz\swap.c \
mpz\tdiv_q.c \
mpz\tdiv_q_2exp.c \
mpz\tdiv_q_ui.c \
mpz\tdiv_qr.c \
mpz\tdiv_qr_ui.c \
mpz\tdiv_r.c \
mpz\tdiv_r_2exp.c \
mpz\tdiv_r_ui.c \
mpz\tdiv_ui.c \
mpz\tstbit.c \
mpz\ui_pow_ui.c \
mpz\ui_sub.c \
mpz\urandomb.c \
mpz\urandomm.c \
mpz\xor.c
MPQ_SOURCES = \
mpq\abs.c \
mpq\aors.c \
mpq\canonicalize.c \
mpq\clear.c \
mpq\cmp.c \
mpq\cmp_si.c \
mpq\cmp_ui.c \
mpq\div.c \
mpq\equal.c \
mpq\get_d.c \
mpq\get_den.c \
mpq\get_num.c \
mpq\get_str.c \
mpq\init.c \
mpq\inp_str.c \
mpq\inv.c \
mpq\md_2exp.c \
mpq\mul.c \
mpq\neg.c \
mpq\out_str.c \
mpq\set.c \
mpq\set_d.c \
mpq\set_den.c \
mpq\set_f.c \
mpq\set_num.c \
mpq\set_si.c \
mpq\set_str.c \
mpq\set_ui.c \
mpq\set_z.c \
mpq\swap.c
GMP_SOURCES = \
assert.c \
compat.c \
errno.c \
extract-dbl.c \
invalid.c \
memory.c \
mp_bpl.c \
mp_clz_tab.c \
mp_dv_tab.c \
mp_get_fns.c \
mp_minv_tab.c \
mp_set_fns.c \
rand.c \
randbui.c \
randclr.c \
randdef.c \
randiset.c \
randlc2s.c \
randlc2x.c \
randmt.c \
randmts.c \
randmui.c \
rands.c \
randsd.c \
randsdui.c \
tal-reent.c \
version.c \
mp_bases.c \
fib_table.c
MPN_AMD64_ASM_SOURCES = \
mpn\amd64i\aors_n.asm \
mpn\amd64i\aorsmul_1.asm \
mpn\amd64i\copyd.asm \
mpn\amd64i\copyi.asm \
mpn\amd64i\dive_1.asm \
mpn\amd64i\lshift.asm \
mpn\amd64i\mode1o.asm \
mpn\amd64i\mul_1.asm \
mpn\amd64i\mul_basecase.asm \
mpn\amd64i\rshift.asm \
mpn\amd64i\sqr_basecase.asm
MPN_P4_ASM_SOURCES = \
mpn\x86i\aors_n.asm \
mpn\x86i\aorsmul_1.asm \
mpn\x86i\pentium4\copyd.asm \
mpn\x86i\pentium4\copyi.asm \
mpn\x86i\dive_1.asm \
mpn\x86i\diveby3.asm \
mpn\x86i\divrem_1.asm \
mpn\x86i\lshift.asm \
mpn\x86i\mod_1.asm \
mpn\x86i\mod_34lsub1.asm \
mpn\x86i\mul_1.asm \
mpn\x86i\mul_basecase.asm \
mpn\x86i\rshift.asm \
mpn\x86i\udiv.asm \
mpn\x86i\umul.asm \
mpn\x86i\pentium4\mmx\lshift.asm \
mpn\x86i\pentium4\mmx\popham.asm \
mpn\x86i\pentium4\mmx\rshift.asm \
mpn\x86i\pentium4\sse2\add_n.asm \
mpn\x86i\pentium4\sse2\addmul_1.asm \
mpn\x86i\pentium4\sse2\dive_1.asm \
mpn\x86i\pentium4\sse2\diveby3.asm \
mpn\x86i\pentium4\sse2\mod_34lsub1.asm \
mpn\x86i\pentium4\sse2\mode1o.asm \
mpn\x86i\pentium4\sse2\mul_1.asm \
mpn\x86i\pentium4\sse2\mul_basecase.asm \
mpn\x86i\pentium4\sse2\sqr_basecase.asm \
mpn\x86i\pentium4\sse2\sub_n.asm \
mpn\x86i\pentium4\sse2\submul_1.asm
MPN_P3_ASM_SOURCES = \
mpn\x86i\aors_n.asm \
mpn\x86i\p6\aorsmul_1.asm \
mpn\x86i\p6\copyd.asm \
mpn\x86i\copyi.asm \
mpn\x86i\p6\dive_1.asm \
mpn\x86i\p6\diveby3.asm \
mpn\x86i\divrem_1.asm \
mpn\x86i\lshift.asm \
mpn\x86i\p6\mod_1.asm \
mpn\x86i\mod_34lsub1.asm \
mpn\x86i\p6\mode1o.asm \
mpn\x86i\mul_1.asm \
mpn\x86i\mul_basecase.asm \
mpn\x86i\rshift.asm \
mpn\x86i\p6\sqr_basecase.asm \
mpn\x86i\udiv.asm \
mpn\x86i\umul.asm \
mpn\x86i\p6\mmx\divrem_1.asm \
mpn\x86i\p6\mmx\lshift.asm \
mpn\x86i\p6\mmx\popham.asm \
mpn\x86i\p6\p3mmx\popham.asm \
mpn\x86i\p6\mmx\rshift.asm
MPN_GC_OBJS = $(MPN_GC_SOURCES:.c=.obj)
#MPN_P0_OBJS = $(MPN_P0_ASM_SOURCES:.asm=.obj) $(MPN_P0_C_SOURCES:.c=.obj)
MPN_P3_OBJS = $(MPN_P3_ASM_SOURCES:.asm=.obj) $(MPN_P3_C_SOURCES:.c=.obj)
MPN_P4_OBJS = $(MPN_P4_ASM_SOURCES:.asm=.obj) $(MPN_P4_C_SOURCES:.c=.obj)
MPN_AMD64_OBJS = $(MPN_AMD64_ASM_SOURCES:.asm=.obj) $(MPN_AMD64_C_SOURCES:.c=.obj) #
MPZ_OBJS = $(MPZ_SOURCES:.c=.obj)
MPQ_OBJS = $(MPQ_SOURCES:.c=.obj)
GMP_OBJS = $(GMP_SOURCES:.c=.obj)
GMP_ALL_OBJS = $(MPZ_OBJS) $(MPQ_OBJS) $(GMP_OBJS)
AUTO_FILES = mp_bases.h mp_bases.c fac_ui.h fib_table.h fib_table.c perfsqr.h
all: $(AUTO_FILES) gmp.lib
mpn_gc.lib: msvc-build $(MPN_GC_OBJS)
link -lib /NOLOGO /OUT:$@ $(MPN_GC_OBJS)
mpn_p0.lib: msvc-build $(MPN_P0_OBJS)
link -lib /NOLOGO /OUT:$@ $(MPN_P0_OBJS)
mpn_p3.lib: msvc-build $(MPN_P3_OBJS)
link -lib /NOLOGO /OUT:$@ $(MPN_P3_OBJS)
mpn_p4.lib: msvc-build $(MPN_P4_OBJS)
link -lib /NOLOGO /OUT:$@ $(MPN_P4_OBJS)
mpn_amd64.lib: msvc-build $(MPN_AMD64_OBJS)
link -lib /NOLOGO /OUT:$@ $(MPN_AMD64_OBJS)
gmp.lib: mpn_$(MPN_TYPE).lib $(GMP_ALL_OBJS)
link -lib /NOLOGO /OUT:$@ *.obj mpz\*.obj mpq\*.obj mpn_$(MPN_TYPE).lib
#CFLAGS = $(CFLAGS_CONFIG) /nologo /W3 /EHsc /I "." /I $(srcdir) /I "$(srcdir)\mpn\generic" /D "WIN32" /D "_LIB" /D "_WIN32" /D "_MBCS"
CFLAGS = $(CFLAGS_CONFIG) /nologo /W3 /EHsc /I "." /I $(srcdir) /I "$(srcdir)\mpn\generic" /D "_LIB" /D "_MBCS" $(D_WIN64)
{.\mpn\generic}.c{.\mpn\generic}.obj:
cl -c $(CFLAGS) /Fo$@ $?
{.\mpz}.c{.\mpz}.obj:
cl -c $(CFLAGS) /Fo$@ $?
{.\mpq}.c{.\mpq}.obj:
cl -c $(CFLAGS) /Fo$@ $?
{$(srcdir)\mpn\generic}.c{mpn\generic}.obj:
cl -c $(CFLAGS) /Fo$@ $?
{$(srcdir)\mpn}.c{mpn}.obj:
cl -c $(CFLAGS) /Fo$@ $?
{$(srcdir)\mpz}.c{mpz}.obj:
cl -c $(CFLAGS) /Fo$@ $?
{$(srcdir)\mpq}.c{mpq}.obj:
cl -c $(CFLAGS) /Fo$@ $?
{$(srcdir)}.c{}.obj:
cl -c $(CFLAGS) /Fo$@ $?
.asm.obj:
# nasm -O9 -Xvc -f win32 -i $(@D)\ -o $@ $?
$(YASM) -O9 $(YASM_FLAGS) -Xvc -f $(YASM_TARGET) -i $(@D)\ -o $@ $?
gen-bases.exe: build.vc8\gen-bases\gen-bases.c
cl $(CFLAGS) /Fe$@ $?
erase gen-bases.obj
mp_bases.h: gen-bases.exe
gen-bases.exe header $(BITS) 0 > $@
mp_bases.c: gen-bases.exe
gen-bases.exe table $(BITS) 0 > $@
gen-fac_ui.exe: build.vc8\gen-fac_ui\gen-fac_ui.c
cl $(CFLAGS) /Fe$@ $?
erase gen-fac_ui.obj
fac_ui.h: gen-fac_ui.exe
gen-fac_ui.exe $(BITS) 0 > $@
gen-fib.exe: build.vc8\gen-fib\gen-fib.c
cl $(CFLAGS) /Fe$@ $?
erase gen-fib.obj
fib_table.h: gen-fib.exe
gen-fib.exe header $(BITS) 0 > $@
fib_table.c: gen-fib.exe
gen-fib.exe table $(BITS) 0 > $@
gen-psqr.exe: build.vc8\gen-psqr\gen-psqr.c
cl $(CFLAGS) /Fe$@ $?
erase gen-psqr.obj
perfsqr.h: gen-psqr.exe
gen-psqr.exe $(BITS) 0 > $@
msvc-build: build.vc8\config-vc8.$(MPN_TYPE) build.vc8\gmp.h
copy build.vc8\config-vc8.$(MPN_TYPE) config.h
copy build.vc8\gmp.h gmp.h
echo > $@
clean:
-erase mpn\generic\*.obj
-erase mpn\amd64i\*.obj
-erase mpn\*.obj
-erase mpz\*.obj
-erase mpq\*.obj
-erase *.obj
-erase mpn_*.lib
-erase gmp.lib
-for %o in ($(MPN_P0_ASM_SOURCES:.asm=.obj)) do erase %o
-for %o in ($(MPN_P3_ASM_SOURCES:.asm=.obj)) do erase %o
-for %o in ($(MPN_P4_ASM_SOURCES:.asm=.obj)) do erase %o
-for %o in ($(AUTO_FILES)) do erase %o
-erase config.h
-erase gmp.h
-erase msvc-build
-erase *.exe *.manifest
-erase *.ilk *.pdb

View file

@ -1,519 +0,0 @@
/* Templates for defines setup by configure.
Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or (at your
option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
MA 02111-1307, USA. */
/* Define one (and only one) of these for the CPU host.
Only hosts that are going to be tested for need to be in this list,
not everything that can possibly be selected.
*/
#undef HAVE_HOST_CPU_alpha
#undef HAVE_HOST_CPU_alphaev5
#undef HAVE_HOST_CPU_alphaev6
#undef HAVE_HOST_CPU_alphaev67
#undef HAVE_HOST_CPU_m68k
#undef HAVE_HOST_CPU_m68000
#undef HAVE_HOST_CPU_m68010
#undef HAVE_HOST_CPU_m68020
#undef HAVE_HOST_CPU_m68030
#undef HAVE_HOST_CPU_m68040
#undef HAVE_HOST_CPU_m68060
#undef HAVE_HOST_CPU_m68302
#undef HAVE_HOST_CPU_m68360
#undef HAVE_HOST_CPU_powerpc604
#undef HAVE_HOST_CPU_powerpc604e
#undef HAVE_HOST_CPU_powerpc750
#undef HAVE_HOST_CPU_powerpc7400
#undef HAVE_HOST_CPU_sparc
#undef HAVE_HOST_CPU_sparcv8
#undef HAVE_HOST_CPU_supersparc
#undef HAVE_HOST_CPU_sparclite
#undef HAVE_HOST_CPU_microsparc
#undef HAVE_HOST_CPU_ultrasparc1
#undef HAVE_HOST_CPU_ultrasparc2
#undef HAVE_HOST_CPU_sparc64
#undef HAVE_HOST_CPU_hppa1_0
#undef HAVE_HOST_CPU_hppa1_1
#undef HAVE_HOST_CPU_hppa2_0n
#undef HAVE_HOST_CPU_hppa2_0w
#undef HAVE_HOST_CPU_i386
#undef HAVE_HOST_CPU_i486
#undef HAVE_HOST_CPU_i586
#undef HAVE_HOST_CPU_i686
#undef HAVE_HOST_CPU_pentium
#undef HAVE_HOST_CPU_pentiummmx
#undef HAVE_HOST_CPU_pentiumpro
#undef HAVE_HOST_CPU_pentium2
#undef HAVE_HOST_CPU_pentium3
#undef HAVE_HOST_CPU_k5
#undef HAVE_HOST_CPU_k6
#undef HAVE_HOST_CPU_k62
#undef HAVE_HOST_CPU_k63
#undef HAVE_HOST_CPU_athlon
/* a dummy to make autoheader happy */
#undef HAVE_HOST_CPU_
/* Define one (and only one) of these for the CPU host family.
Only hosts that are going to be tested for need to be in this list,
not everything that can possibly be selected.
*/
#undef HAVE_HOST_CPU_FAMILY_power
#undef HAVE_HOST_CPU_FAMILY_powerpc
#define HAVE_HOST_CPU_FAMILY_x86 1
/* Define if we have native implementation of function.
(use just one of the three following defines)
*/
#undef HAVE_NATIVE_mpn_add
#undef HAVE_NATIVE_mpn_add_1
#undef HAVE_NATIVE_mpn_addmul_2
#undef HAVE_NATIVE_mpn_addmul_3
#undef HAVE_NATIVE_mpn_addmul_4
#undef HAVE_NATIVE_mpn_addsub_n
#undef HAVE_NATIVE_mpn_addsub_nc
#undef HAVE_NATIVE_mpn_and_n
#undef HAVE_NATIVE_mpn_andn_n
#undef HAVE_NATIVE_mpn_bdivmod
#undef HAVE_NATIVE_mpn_cmp
#undef HAVE_NATIVE_mpn_com_n
#undef HAVE_NATIVE_mpn_divrem
#undef HAVE_NATIVE_mpn_divrem_2
#undef HAVE_NATIVE_mpn_divrem_newton
#undef HAVE_NATIVE_mpn_divrem_classic
#undef HAVE_NATIVE_mpn_dump
#undef HAVE_NATIVE_mpn_gcd
#undef HAVE_NATIVE_mpn_gcd_1
#undef HAVE_NATIVE_mpn_gcd_finda
#undef HAVE_NATIVE_mpn_gcdext
#undef HAVE_NATIVE_mpn_get_str
#undef HAVE_NATIVE_mpn_invert_limb
#undef HAVE_NATIVE_mpn_ior_n
#undef HAVE_NATIVE_mpn_iorn_n
#undef HAVE_NATIVE_mpn_mul
#undef HAVE_NATIVE_mpn_mul_2
#undef HAVE_NATIVE_mpn_mul_3
#undef HAVE_NATIVE_mpn_mul_4
#undef HAVE_NATIVE_mpn_mul_n
#undef HAVE_NATIVE_mpn_nand_n
#undef HAVE_NATIVE_mpn_nior_n
#undef HAVE_NATIVE_mpn_perfect_square_p
#undef HAVE_NATIVE_mpn_preinv_mod_1
#undef HAVE_NATIVE_mpn_random2
#undef HAVE_NATIVE_mpn_random
#undef HAVE_NATIVE_mpn_rawrandom
#undef HAVE_NATIVE_mpn_scan0
#undef HAVE_NATIVE_mpn_scan1
#undef HAVE_NATIVE_mpn_set_str
#undef HAVE_NATIVE_mpn_sqrtrem
#undef HAVE_NATIVE_mpn_sqr_diagonal
#undef HAVE_NATIVE_mpn_sub
#undef HAVE_NATIVE_mpn_sub_1
#undef HAVE_NATIVE_mpn_udiv_w_sdiv
#undef HAVE_NATIVE_mpn_xor_n
#undef HAVE_NATIVE_mpn_xnor_n
#define HAVE_NATIVE_mpn_add_n 1
#define HAVE_NATIVE_mpn_add_nc 1
#define HAVE_NATIVE_mpn_sub_n 1
#define HAVE_NATIVE_mpn_sub_nc 1
#define HAVE_NATIVE_mpn_addmul_1 1
#define HAVE_NATIVE_mpn_addmul_1c 1
#define HAVE_NATIVE_mpn_submul_1 1
#define HAVE_NATIVE_mpn_submul_1c 1
#define HAVE_NATIVE_mpn_copyd 1
#define HAVE_NATIVE_mpn_copyi 1
#define HAVE_NATIVE_mpn_divexact_1 1
#undef HAVE_NATIVE_mpn_divexact_by3c
#undef HAVE_NATIVE_mpn_divrem_1
#undef HAVE_NATIVE_mpn_divrem_1c
#undef HAVE_NATIVE_mpn_hamdist
#undef HAVE_NATIVE_mpn_popcount
#define HAVE_NATIVE_mpn_lshift 1
#define HAVE_NATIVE_mpn_rshift 1
#undef HAVE_NATIVE_mpn_mod_1
#undef HAVE_NATIVE_mpn_mod_1c
#define HAVE_NATIVE_mpn_modexact_1_odd 1
#define HAVE_NATIVE_mpn_modexact_1c_odd 1
#define HAVE_NATIVE_mpn_mul_1 1
#define HAVE_NATIVE_mpn_mul_1c 1
#define HAVE_NATIVE_mpn_mul_basecase 1
#define HAVE_NATIVE_mpn_sqr_basecase 1
#undef HAVE_NATIVE_mpn_umul_ppmm
#undef HAVE_NATIVE_mpn_udiv_qrnnd
/* For the generic C code */
#define HAVE_NATIVE_mpn_add_n 1
#define HAVE_NATIVE_mpn_sub_n 1
/* a dummy to make autoheader happy */
#undef HAVE_NATIVE_
/* The gmp-mparam.h to update when tuning. */
#undef GMP_MPARAM_H_SUGGEST
/* Define if you have the `alarm' function. */
#undef HAVE_ALARM
/* Define if alloca() works (via gmp-impl.h). */
#define HAVE_ALLOCA 1
/* Define if you have <alloca.h> and it should be used (not on Ultrix). */
#undef HAVE_ALLOCA_H
/* Define if the compiler accepts gcc style __attribute__ ((const)) */
#undef HAVE_ATTRIBUTE_CONST
/* Define if the compiler accepts gcc style __attribute__ ((malloc)) */
#undef HAVE_ATTRIBUTE_MALLOC
/* Define if the compiler accepts gcc style __attribute__ ((mode (XX))) */
#undef HAVE_ATTRIBUTE_MODE
/* Define if the compiler accepts gcc style __attribute__ ((noreturn)) */
#undef HAVE_ATTRIBUTE_NORETURN
/* Define if tests/libtests has calling conventions checking for the CPU */
#undef HAVE_CALLING_CONVENTIONS
/* Define if you have the `clock' function. */
#define HAVE_CLOCK 1
/* Define if you have the `clock_gettime' function. */
#undef HAVE_CLOCK_GETTIME
/* Define if you have the `cputime' function. */
#undef HAVE_CPUTIME
/* Define to 1 if you have the declaration of `fgetc', and to 0 if you don't.
*/
#define HAVE_DECL_FGETC 1
/* Define to 1 if you have the declaration of `fscanf', and to 0 if you don't.
*/
#define HAVE_DECL_FSCANF 1
/* Define to 1 if you have the declaration of `optarg', and to 0 if you don't.
*/
#define HAVE_DECL_OPTARG 0
/* Define to 1 if you have the declaration of `ungetc', and to 0 if you don't.
*/
#define HAVE_DECL_UNGETC 1
/* Define to 1 if you have the declaration of `vfprintf', and to 0 if you
don't. */
#define HAVE_DECL_VFPRINTF 1
/* Define if denormalized floats work. */
#define HAVE_DENORMS 1
/* Define if you have the <dlfcn.h> header file. */
#undef HAVE_DLFCN_H
/* Define one (and only one) of the following for the format of a `double'.
If your format is not among these choices, or you don't know what it is,
then leave all of them undefined.
"IEEE_LITTLE_SWAPPED" means little endian, but with the two 4-byte halves
swapped, as used by ARM CPUs in little endian mode. */
#undef HAVE_DOUBLE_IEEE_BIG_ENDIAN
#define HAVE_DOUBLE_IEEE_LITTLE_ENDIAN 1
#undef HAVE_DOUBLE_IEEE_LITTLE_SWAPPED
#undef HAVE_DOUBLE_VAX_D
#undef HAVE_DOUBLE_VAX_G
#undef HAVE_DOUBLE_CRAY_CFP
/* Define if you have the <fcntl.h> header file. */
#define HAVE_FCNTL_H 1
/* Define if you have the <fpu_control.h> header file. */
#undef HAVE_FPU_CONTROL_H
/* Define if you have the `getpagesize' function. */
#undef HAVE_GETPAGESIZE
/* Define if you have the `getrusage' function. */
#undef HAVE_GETRUSAGE
/* Define if you have the `gettimeofday' function. */
#undef HAVE_GETTIMEOFDAY
/* Define if 0/0, 1/0, -1/0 and sqrt(-1) work to generate NaN/infinities. */
#define HAVE_INFS 1
/* Define if the system has the type `intmax_t'. */
#undef HAVE_INTMAX_T
/* Define if you have the <inttypes.h> header file. */
#undef HAVE_INTTYPES_H
/* Define one (just one) of the following for the endiannes of `mp_limb_t'.
If the endianness is not a simple big or little, or you don't know what
it is, then leave both of these undefined. */
#undef HAVE_LIMB_BIG_ENDIAN
#define HAVE_LIMB_LITTLE_ENDIAN 1
#define HAVE_STD__LOCALE 1
/* Define if you have the `localeconv' function. */
#define HAVE_LOCALECONV 1
/* Define if you have the <locale.h> header file. */
#define HAVE_LOCALE_H 1
/* Define if the system has the type `long double'. */
#define HAVE_LONG_DOUBLE 1
/* Define if the system has the type `long long'. */
#define HAVE_LONG_LONG 1
/* Define if you have the `lrand48' function. */
#undef HAVE_LRAND48
/* Define if you have the <memory.h> header file. */
#define HAVE_MEMORY_H 1
/* Define if you have the `memset' function. */
#define HAVE_MEMSET 1
/* Define if you have the `mmap' function. */
#undef HAVE_MMAP
/* Define if you have the `mprotect' function. */
#undef HAVE_MPROTECT
/* Define if you have the `obstack_vprintf' function. */
#undef HAVE_OBSTACK_VPRINTF
/* Define if you have the `popen' function. */
#undef HAVE_POPEN
/* Define if you have the `processor_info' function. */
#undef HAVE_PROCESSOR_INFO
/* Define if the system has the type `ptrdiff_t'. */
#define HAVE_PTRDIFF_T 1
/* Define if the system has the type `quad_t'. */
#undef HAVE_QUAD_T
#define HAVE_RAISE 1
/* Define if you have the `read_real_time' function. */
#undef HAVE_READ_REAL_TIME
/* Define if you have the `sigaction' function. */
#undef HAVE_SIGACTION
/* Define if you have the `sigaltstack' function. */
#undef HAVE_SIGALTSTACK
/* Define if you have the `sigstack' function. */
#undef HAVE_SIGSTACK
/* Tune directory speed_cyclecounter, undef=none, 1=32bits, 2=64bits) */
#define HAVE_SPEED_CYCLECOUNTER 2
/* Define if the system has the type `stack_t'. */
#undef HAVE_STACK_T
/* Define if <stdarg.h> exists and works */
#define HAVE_STDARG 1
/* Define if you have the <stdint.h> header file. */
#undef HAVE_STDINT_H
/* Define if you have the <stdlib.h> header file. */
#define HAVE_STDLIB_H 1
/* Define if you have the `strcasecmp' function. */
#undef HAVE_STRCASECMP
/* Define if you have the `strchr' function. */
#define HAVE_STRCHR 1
/* Define if cpp supports the ANSI # stringizing operator. */
#define HAVE_STRINGIZE 1
/* Define if you have the <strings.h> header file. */
#undef HAVE_STRINGS_H
/* Define if you have the <string.h> header file. */
#define HAVE_STRING_H 1
/* Define if you have the `strnlen' function. */
#define HAVE_STRNLEN 1
/* Define if you have the `strtoul' function. */
#define HAVE_STRTOUL 1
/* Define if you have the `sysconf' function. */
#undef HAVE_SYSCONF
/* Define if you have the `sysctl' function. */
#undef HAVE_SYSCTL
/* Define if you have the `sysctlbyname' function. */
#undef HAVE_SYSCTLBYNAME
/* Define if you have the `syssgi' function. */
#undef HAVE_SYSSGI
/* Define if you have the <sys/mman.h> header file. */
#undef HAVE_SYS_MMAN_H
/* Define if you have the <sys/param.h> header file. */
#undef HAVE_SYS_PARAM_H
/* Define if you have the <sys/processor.h> header file. */
#undef HAVE_SYS_PROCESSOR_H
/* Define if you have the <sys/resource.h> header file. */
#undef HAVE_SYS_RESOURCE_H
/* Define if you have the <sys/stat.h> header file. */
#define HAVE_SYS_STAT_H 1
/* Define if you have the <sys/sysctl.h> header file. */
#undef HAVE_SYS_SYSCTL_H
/* Define if you have the <sys/syssgi.h> header file. */
#undef HAVE_SYS_SYSSGI_H
/* Define if you have the <sys/systemcfg.h> header file. */
#undef HAVE_SYS_SYSTEMCFG_H
/* Define if you have the <sys/times.h> header file. */
#undef HAVE_SYS_TIMES_H
/* Define if you have the <sys/time.h> header file. */
#undef HAVE_SYS_TIME_H
/* Define if you have the <sys/types.h> header file. */
#define HAVE_SYS_TYPES_H 1
/* Define if you have the `times' function. */
#undef HAVE_TIMES
/* Define if you have the <unistd.h> header file. */
#undef HAVE_UNISTD_H
/* Define if you have vsnprintf and it works properly. */
#undef HAVE_VSNPRINTF
/* Assembler local label prefix */
#undef LSYM_PREFIX
/* Define if you have the `fesetround' function via the <fenv.h> header file.
*/
#undef MPFR_HAVE_FESETROUND
#define HAVE_SSTREAM 1
/* Name of package */
#define PACKAGE "gmp"
/* Define if compiler has function prototypes */
#define PROTOTYPES 1
/* Define as the return type of signal handlers (`int' or `void'). */
#define RETSIGTYPE void
/* The size of a `unsigned long', as computed by sizeof. */
#define SIZEOF_UNSIGNED_LONG 4
/* Define if sscanf requires writable inputs */
#undef SSCANF_WRITABLE_INPUT
/* Define if you have the ANSI C header files. */
#define STDC_HEADERS 1
/* Define if you can safely include both <sys/time.h> and <time.h>. */
#undef TIME_WITH_SYS_TIME
/* Maximum size the tune program can test for KARATSUBA_SQR_THRESHOLD */
#define TUNE_KARATSUBA_SQR_MAX 67
/* Version number of package */
#define VERSION "4.2.1 (VC8)"
/* ./configure --enable-assert option, to enable some ASSERT()s */
#undef WANT_ASSERT
/* ./configure --enable-fft option, to enable FFTs for multiplication */
#define WANT_FFT 1
/* Define to 1 if --enable-profiling=gprof */
#undef WANT_PROFILING_GPROF
/* Define to 1 if --enable-profiling=prof */
#undef WANT_PROFILING_PROF
/* --enable-alloca=yes */
#undef WANT_TMP_ALLOCA
/* --enable-alloca=debug */
#undef WANT_TMP_DEBUG
/* --enable-alloca=malloc-notreentrant */
#undef WANT_TMP_NOTREENTRANT
/* --enable-alloca=malloc-reentrant */
#define WANT_TMP_REENTRANT 1
/* Define if your processor stores words with the most significant byte first
(like Motorola and SPARC, unlike Intel and VAX). */
#undef WORDS_BIGENDIAN
/* Define if `lex' declares `yytext' as a `char *' by default, not a `char[]'.
*/
#undef YYTEXT_POINTER
/* Define as `__inline' if that's what the C compiler calls it, or to nothing
if it is not supported. */
#define inline __inline
/* Define to empty if the keyword `volatile' does not work. Warning: valid
code using `volatile' can become incorrect without. Disable with care. */
#undef volatile
#ifdef _MSC_VER
#define strcasecmp _stricmp
#define strncasecmp _strnicmp
#define alloca _alloca
#define HAVE_STRCASECMP 1
#define HAVE_STRNCASECMP 1
#endif

View file

@ -1,518 +0,0 @@
/* Templates for defines setup by configure.
Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or (at your
option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
MA 02111-1307, USA. */
/* Define one (and only one) of these for the CPU host.
Only hosts that are going to be tested for need to be in this list,
not everything that can possibly be selected.
*/
#undef HAVE_HOST_CPU_alpha
#undef HAVE_HOST_CPU_alphaev5
#undef HAVE_HOST_CPU_alphaev6
#undef HAVE_HOST_CPU_alphaev67
#undef HAVE_HOST_CPU_m68k
#undef HAVE_HOST_CPU_m68000
#undef HAVE_HOST_CPU_m68010
#undef HAVE_HOST_CPU_m68020
#undef HAVE_HOST_CPU_m68030
#undef HAVE_HOST_CPU_m68040
#undef HAVE_HOST_CPU_m68060
#undef HAVE_HOST_CPU_m68302
#undef HAVE_HOST_CPU_m68360
#undef HAVE_HOST_CPU_powerpc604
#undef HAVE_HOST_CPU_powerpc604e
#undef HAVE_HOST_CPU_powerpc750
#undef HAVE_HOST_CPU_powerpc7400
#undef HAVE_HOST_CPU_sparc
#undef HAVE_HOST_CPU_sparcv8
#undef HAVE_HOST_CPU_supersparc
#undef HAVE_HOST_CPU_sparclite
#undef HAVE_HOST_CPU_microsparc
#undef HAVE_HOST_CPU_ultrasparc1
#undef HAVE_HOST_CPU_ultrasparc2
#undef HAVE_HOST_CPU_sparc64
#undef HAVE_HOST_CPU_hppa1_0
#undef HAVE_HOST_CPU_hppa1_1
#undef HAVE_HOST_CPU_hppa2_0n
#undef HAVE_HOST_CPU_hppa2_0w
#undef HAVE_HOST_CPU_i386
#undef HAVE_HOST_CPU_i486
#undef HAVE_HOST_CPU_i586
#undef HAVE_HOST_CPU_i686
#undef HAVE_HOST_CPU_pentium
#undef HAVE_HOST_CPU_pentiummmx
#undef HAVE_HOST_CPU_pentiumpro
#undef HAVE_HOST_CPU_pentium2
#undef HAVE_HOST_CPU_pentium3
#undef HAVE_HOST_CPU_k5
#undef HAVE_HOST_CPU_k6
#undef HAVE_HOST_CPU_k62
#undef HAVE_HOST_CPU_k63
#undef HAVE_HOST_CPU_athlon
/* a dummy to make autoheader happy */
#undef HAVE_HOST_CPU_
/* Define one (and only one) of these for the CPU host family.
Only hosts that are going to be tested for need to be in this list,
not everything that can possibly be selected.
*/
#undef HAVE_HOST_CPU_FAMILY_power
#undef HAVE_HOST_CPU_FAMILY_powerpc
#define HAVE_HOST_CPU_FAMILY_x86 1
/* Define if we have native implementation of function.
(use just one of the three following defines)
*/
#undef HAVE_NATIVE_mpn_add
#undef HAVE_NATIVE_mpn_add_1
#undef HAVE_NATIVE_mpn_addmul_2
#undef HAVE_NATIVE_mpn_addmul_3
#undef HAVE_NATIVE_mpn_addmul_4
#undef HAVE_NATIVE_mpn_addsub_n
#undef HAVE_NATIVE_mpn_addsub_nc
#undef HAVE_NATIVE_mpn_and_n
#undef HAVE_NATIVE_mpn_andn_n
#undef HAVE_NATIVE_mpn_bdivmod
#undef HAVE_NATIVE_mpn_cmp
#undef HAVE_NATIVE_mpn_com_n
#undef HAVE_NATIVE_mpn_divrem
#undef HAVE_NATIVE_mpn_divrem_2
#undef HAVE_NATIVE_mpn_divrem_newton
#undef HAVE_NATIVE_mpn_divrem_classic
#undef HAVE_NATIVE_mpn_dump
#undef HAVE_NATIVE_mpn_gcd
#undef HAVE_NATIVE_mpn_gcd_1
#undef HAVE_NATIVE_mpn_gcd_finda
#undef HAVE_NATIVE_mpn_gcdext
#undef HAVE_NATIVE_mpn_get_str
#undef HAVE_NATIVE_mpn_invert_limb
#undef HAVE_NATIVE_mpn_ior_n
#undef HAVE_NATIVE_mpn_iorn_n
#undef HAVE_NATIVE_mpn_mul
#undef HAVE_NATIVE_mpn_mul_2
#undef HAVE_NATIVE_mpn_mul_3
#undef HAVE_NATIVE_mpn_mul_4
#undef HAVE_NATIVE_mpn_mul_n
#undef HAVE_NATIVE_mpn_nand_n
#undef HAVE_NATIVE_mpn_nior_n
#undef HAVE_NATIVE_mpn_perfect_square_p
#undef HAVE_NATIVE_mpn_preinv_mod_1
#undef HAVE_NATIVE_mpn_random2
#undef HAVE_NATIVE_mpn_random
#undef HAVE_NATIVE_mpn_rawrandom
#undef HAVE_NATIVE_mpn_scan0
#undef HAVE_NATIVE_mpn_scan1
#undef HAVE_NATIVE_mpn_set_str
#undef HAVE_NATIVE_mpn_sqrtrem
#undef HAVE_NATIVE_mpn_sqr_diagonal
#undef HAVE_NATIVE_mpn_sub
#undef HAVE_NATIVE_mpn_sub_1
#undef HAVE_NATIVE_mpn_udiv_w_sdiv
#undef HAVE_NATIVE_mpn_xor_n
#undef HAVE_NATIVE_mpn_xnor_n
#undef HAVE_NATIVE_mpn_add_n
#undef HAVE_NATIVE_mpn_add_nc
#undef HAVE_NATIVE_mpn_sub_n
#undef HAVE_NATIVE_mpn_sub_nc
#undef HAVE_NATIVE_mpn_addmul_1
#undef HAVE_NATIVE_mpn_addmul_1c
#undef HAVE_NATIVE_mpn_submul_1
#undef HAVE_NATIVE_mpn_submul_1c
#undef HAVE_NATIVE_mpn_copyd
#undef HAVE_NATIVE_mpn_copyi
#undef HAVE_NATIVE_mpn_divexact_1
#undef HAVE_NATIVE_mpn_divexact_by3c
#undef HAVE_NATIVE_mpn_divrem_1
#undef HAVE_NATIVE_mpn_divrem_1c
#undef HAVE_NATIVE_mpn_hamdist
#undef HAVE_NATIVE_mpn_popcount
#undef HAVE_NATIVE_mpn_lshift
#undef HAVE_NATIVE_mpn_rshift
#undef HAVE_NATIVE_mpn_mod_1
#undef HAVE_NATIVE_mpn_mod_1c
#undef HAVE_NATIVE_mpn_modexact_1_odd
#undef HAVE_NATIVE_mpn_modexact_1c_odd
#undef HAVE_NATIVE_mpn_mul_1
#undef HAVE_NATIVE_mpn_mul_1c
#undef HAVE_NATIVE_mpn_mul_basecase
#undef HAVE_NATIVE_mpn_sqr_basecase
#undef HAVE_NATIVE_mpn_umul_ppmm
#undef HAVE_NATIVE_mpn_udiv_qrnnd
/* For the generic C code */
#define HAVE_NATIVE_mpn_add_n 1
#define HAVE_NATIVE_mpn_sub_n 1
/* a dummy to make autoheader happy */
#undef HAVE_NATIVE_
/* The gmp-mparam.h to update when tuning. */
#undef GMP_MPARAM_H_SUGGEST
/* Define if you have the `alarm' function. */
#undef HAVE_ALARM
/* Define if alloca() works (via gmp-impl.h). */
#define HAVE_ALLOCA 1
/* Define if you have <alloca.h> and it should be used (not on Ultrix). */
#undef HAVE_ALLOCA_H
/* Define if the compiler accepts gcc style __attribute__ ((const)) */
#undef HAVE_ATTRIBUTE_CONST
/* Define if the compiler accepts gcc style __attribute__ ((malloc)) */
#undef HAVE_ATTRIBUTE_MALLOC
/* Define if the compiler accepts gcc style __attribute__ ((mode (XX))) */
#undef HAVE_ATTRIBUTE_MODE
/* Define if the compiler accepts gcc style __attribute__ ((noreturn)) */
#undef HAVE_ATTRIBUTE_NORETURN
/* Define if tests/libtests has calling conventions checking for the CPU */
#undef HAVE_CALLING_CONVENTIONS
/* Define if you have the `clock' function. */
#define HAVE_CLOCK 1
/* Define if you have the `clock_gettime' function. */
#undef HAVE_CLOCK_GETTIME
/* Define if you have the `cputime' function. */
#undef HAVE_CPUTIME
/* Define to 1 if you have the declaration of `fgetc', and to 0 if you don't.
*/
#define HAVE_DECL_FGETC 1
/* Define to 1 if you have the declaration of `fscanf', and to 0 if you don't.
*/
#define HAVE_DECL_FSCANF 1
/* Define to 1 if you have the declaration of `optarg', and to 0 if you don't.
*/
#define HAVE_DECL_OPTARG 0
/* Define to 1 if you have the declaration of `ungetc', and to 0 if you don't.
*/
#define HAVE_DECL_UNGETC 1
/* Define to 1 if you have the declaration of `vfprintf', and to 0 if you
don't. */
#define HAVE_DECL_VFPRINTF 1
/* Define if denormalized floats work. */
#define HAVE_DENORMS 1
/* Define if you have the <dlfcn.h> header file. */
#undef HAVE_DLFCN_H
/* Define one (and only one) of the following for the format of a `double'.
If your format is not among these choices, or you don't know what it is,
then leave all of them undefined.
"IEEE_LITTLE_SWAPPED" means little endian, but with the two 4-byte halves
swapped, as used by ARM CPUs in little endian mode. */
#undef HAVE_DOUBLE_IEEE_BIG_ENDIAN
#define HAVE_DOUBLE_IEEE_LITTLE_ENDIAN 1
#undef HAVE_DOUBLE_IEEE_LITTLE_SWAPPED
#undef HAVE_DOUBLE_VAX_D
#undef HAVE_DOUBLE_VAX_G
#undef HAVE_DOUBLE_CRAY_CFP
/* Define if you have the <fcntl.h> header file. */
#define HAVE_FCNTL_H 1
/* Define if you have the <fpu_control.h> header file. */
#undef HAVE_FPU_CONTROL_H
/* Define if you have the `getpagesize' function. */
#undef HAVE_GETPAGESIZE
/* Define if you have the `getrusage' function. */
#undef HAVE_GETRUSAGE
/* Define if you have the `gettimeofday' function. */
#undef HAVE_GETTIMEOFDAY
/* Define if 0/0, 1/0, -1/0 and sqrt(-1) work to generate NaN/infinities. */
#define HAVE_INFS 1
/* Define if the system has the type `intmax_t'. */
#undef HAVE_INTMAX_T
/* Define if you have the <inttypes.h> header file. */
#undef HAVE_INTTYPES_H
/* Define one (just one) of the following for the endiannes of `mp_limb_t'.
If the endianness is not a simple big or little, or you don't know what
it is, then leave both of these undefined. */
#undef HAVE_LIMB_BIG_ENDIAN
#define HAVE_LIMB_LITTLE_ENDIAN 1
#define HAVE_STD__LOCALE 1
/* Define if you have the `localeconv' function. */
#define HAVE_LOCALECONV 1
/* Define if you have the <locale.h> header file. */
#define HAVE_LOCALE_H 1
/* Define if the system has the type `long double'. */
#define HAVE_LONG_DOUBLE 1
/* Define if the system has the type `long long'. */
#define HAVE_LONG_LONG 0
/* Define if you have the `lrand48' function. */
#undef HAVE_LRAND48
/* Define if you have the <memory.h> header file. */
#define HAVE_MEMORY_H 1
/* Define if you have the `memset' function. */
#define HAVE_MEMSET 1
/* Define if you have the `mmap' function. */
#undef HAVE_MMAP
/* Define if you have the `mprotect' function. */
#undef HAVE_MPROTECT
/* Define if you have the `obstack_vprintf' function. */
#undef HAVE_OBSTACK_VPRINTF
/* Define if you have the `popen' function. */
#undef HAVE_POPEN
/* Define if you have the `processor_info' function. */
#undef HAVE_PROCESSOR_INFO
/* Define if the system has the type `ptrdiff_t'. */
#define HAVE_PTRDIFF_T 1
/* Define if the system has the type `quad_t'. */
#undef HAVE_QUAD_T
#define HAVE_RAISE 1
/* Define if you have the `read_real_time' function. */
#undef HAVE_READ_REAL_TIME
/* Define if you have the `sigaction' function. */
#undef HAVE_SIGACTION
/* Define if you have the `sigaltstack' function. */
#undef HAVE_SIGALTSTACK
/* Define if you have the `sigstack' function. */
#undef HAVE_SIGSTACK
/* Tune directory speed_cyclecounter, undef=none, 1=32bits, 2=64bits) */
#define HAVE_SPEED_CYCLECOUNTER 2
/* Define if the system has the type `stack_t'. */
#undef HAVE_STACK_T
/* Define if <stdarg.h> exists and works */
#define HAVE_STDARG 1
/* Define if you have the <stdint.h> header file. */
#undef HAVE_STDINT_H
/* Define if you have the <stdlib.h> header file. */
#define HAVE_STDLIB_H 1
/* Define if you have the `strcasecmp' function. */
#undef HAVE_STRCASECMP
/* Define if you have the `strchr' function. */
#define HAVE_STRCHR 1
/* Define if cpp supports the ANSI # stringizing operator. */
#define HAVE_STRINGIZE 1
/* Define if you have the <strings.h> header file. */
#undef HAVE_STRINGS_H
/* Define if you have the <string.h> header file. */
#define HAVE_STRING_H 1
/* Define if you have the `strnlen' function. */
#define HAVE_STRNLEN 1
/* Define if you have the `strtoul' function. */
#define HAVE_STRTOUL 1
/* Define if you have the `sysconf' function. */
#undef HAVE_SYSCONF
/* Define if you have the `sysctl' function. */
#undef HAVE_SYSCTL
/* Define if you have the `sysctlbyname' function. */
#undef HAVE_SYSCTLBYNAME
/* Define if you have the `syssgi' function. */
#undef HAVE_SYSSGI
/* Define if you have the <sys/mman.h> header file. */
#undef HAVE_SYS_MMAN_H
/* Define if you have the <sys/param.h> header file. */
#undef HAVE_SYS_PARAM_H
/* Define if you have the <sys/processor.h> header file. */
#undef HAVE_SYS_PROCESSOR_H
/* Define if you have the <sys/resource.h> header file. */
#undef HAVE_SYS_RESOURCE_H
/* Define if you have the <sys/stat.h> header file. */
#define HAVE_SYS_STAT_H 1
/* Define if you have the <sys/sysctl.h> header file. */
#undef HAVE_SYS_SYSCTL_H
/* Define if you have the <sys/syssgi.h> header file. */
#undef HAVE_SYS_SYSSGI_H
/* Define if you have the <sys/systemcfg.h> header file. */
#undef HAVE_SYS_SYSTEMCFG_H
/* Define if you have the <sys/times.h> header file. */
#undef HAVE_SYS_TIMES_H
/* Define if you have the <sys/time.h> header file. */
#undef HAVE_SYS_TIME_H
/* Define if you have the <sys/types.h> header file. */
#define HAVE_SYS_TYPES_H 1
/* Define if you have the `times' function. */
#undef HAVE_TIMES
/* Define if you have the <unistd.h> header file. */
#undef HAVE_UNISTD_H
/* Define if you have vsnprintf and it works properly. */
#undef HAVE_VSNPRINTF
/* Assembler local label prefix */
#undef LSYM_PREFIX
/* Define if you have the `fesetround' function via the <fenv.h> header file.
*/
#undef MPFR_HAVE_FESETROUND
#define HAVE_SSTREAM 1
/* Name of package */
#define PACKAGE "gmp"
/* Define if compiler has function prototypes */
#define PROTOTYPES 1
/* Define as the return type of signal handlers (`int' or `void'). */
#define RETSIGTYPE void
/* The size of a `unsigned long', as computed by sizeof. */
#define SIZEOF_UNSIGNED_LONG 4
/* Define if sscanf requires writable inputs */
#undef SSCANF_WRITABLE_INPUT
/* Define if you have the ANSI C header files. */
#define STDC_HEADERS 1
/* Define if you can safely include both <sys/time.h> and <time.h>. */
#undef TIME_WITH_SYS_TIME
/* Maximum size the tune program can test for KARATSUBA_SQR_THRESHOLD */
#define TUNE_KARATSUBA_SQR_MAX 67
/* Version number of package */
#define VERSION "4.2.1 (VC8)"
/* ./configure --enable-assert option, to enable some ASSERT()s */
#undef WANT_ASSERT
/* ./configure --enable-fft option, to enable FFTs for multiplication */
#define WANT_FFT 1
/* Define to 1 if --enable-profiling=gprof */
#undef WANT_PROFILING_GPROF
/* Define to 1 if --enable-profiling=prof */
#undef WANT_PROFILING_PROF
/* --enable-alloca=yes */
#undef WANT_TMP_ALLOCA
/* --enable-alloca=debug */
#undef WANT_TMP_DEBUG
/* --enable-alloca=malloc-notreentrant */
#undef WANT_TMP_NOTREENTRANT
/* --enable-alloca=malloc-reentrant */
#define WANT_TMP_REENTRANT 1
/* Define if your processor stores words with the most significant byte first
(like Motorola and SPARC, unlike Intel and VAX). */
#undef WORDS_BIGENDIAN
/* Define if `lex' declares `yytext' as a `char *' by default, not a `char[]'.
*/
#undef YYTEXT_POINTER
/* Define as `__inline' if that's what the C compiler calls it, or to nothing
if it is not supported. */
#define inline __inline
/* Define to empty if the keyword `volatile' does not work. Warning: valid
code using `volatile' can become incorrect without. Disable with care. */
#undef volatile
#ifdef _MSC_VER
#define strcasecmp _stricmp
#define strncasecmp _strnicmp
#define alloca _alloca
#define HAVE_STRCASECMP 1
#define HAVE_STRNCASECMP 1
#endif

View file

@ -1,543 +0,0 @@
/* Templates for defines setup by configure.
Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or (at your
option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
MA 02111-1307, USA. */
/* Define one (and only one) of these for the CPU host.
Only hosts that are going to be tested for need to be in this list,
not everything that can possibly be selected.
*/
#undef HAVE_HOST_CPU_alpha
#undef HAVE_HOST_CPU_alphaev5
#undef HAVE_HOST_CPU_alphaev6
#undef HAVE_HOST_CPU_alphaev67
#undef HAVE_HOST_CPU_m68k
#undef HAVE_HOST_CPU_m68000
#undef HAVE_HOST_CPU_m68010
#undef HAVE_HOST_CPU_m68020
#undef HAVE_HOST_CPU_m68030
#undef HAVE_HOST_CPU_m68040
#undef HAVE_HOST_CPU_m68060
#undef HAVE_HOST_CPU_m68302
#undef HAVE_HOST_CPU_m68360
#undef HAVE_HOST_CPU_powerpc604
#undef HAVE_HOST_CPU_powerpc604e
#undef HAVE_HOST_CPU_powerpc750
#undef HAVE_HOST_CPU_powerpc7400
#undef HAVE_HOST_CPU_sparc
#undef HAVE_HOST_CPU_sparcv8
#undef HAVE_HOST_CPU_supersparc
#undef HAVE_HOST_CPU_sparclite
#undef HAVE_HOST_CPU_microsparc
#undef HAVE_HOST_CPU_ultrasparc1
#undef HAVE_HOST_CPU_ultrasparc2
#undef HAVE_HOST_CPU_sparc64
#undef HAVE_HOST_CPU_hppa1_0
#undef HAVE_HOST_CPU_hppa1_1
#undef HAVE_HOST_CPU_hppa2_0n
#undef HAVE_HOST_CPU_hppa2_0w
#undef HAVE_HOST_CPU_i386
#undef HAVE_HOST_CPU_i486
#undef HAVE_HOST_CPU_i586
#undef HAVE_HOST_CPU_i686
#define HAVE_HOST_CPU_pentium 1
#undef HAVE_HOST_CPU_pentiummmx
#undef HAVE_HOST_CPU_pentiumpro
#undef HAVE_HOST_CPU_pentium2
#undef HAVE_HOST_CPU_pentium3
#undef HAVE_HOST_CPU_k5
#undef HAVE_HOST_CPU_k6
#undef HAVE_HOST_CPU_k62
#undef HAVE_HOST_CPU_k63
#undef HAVE_HOST_CPU_athlon
/* a dummy to make autoheader happy */
#undef HAVE_HOST_CPU_
/* Define one (and only one) of these for the CPU host family.
Only hosts that are going to be tested for need to be in this list,
not everything that can possibly be selected.
*/
#undef HAVE_HOST_CPU_FAMILY_power
#undef HAVE_HOST_CPU_FAMILY_powerpc
#define HAVE_HOST_CPU_FAMILY_x86 1
/* Define if we have native implementation of function.
(use just one of the three following defines)
*/
#undef HAVE_NATIVE_mpn_add
#undef HAVE_NATIVE_mpn_add_1
#undef HAVE_NATIVE_mpn_addmul_2
#undef HAVE_NATIVE_mpn_addmul_3
#undef HAVE_NATIVE_mpn_addmul_4
#undef HAVE_NATIVE_mpn_addsub_n
#undef HAVE_NATIVE_mpn_addsub_nc
#undef HAVE_NATIVE_mpn_and_n
#undef HAVE_NATIVE_mpn_andn_n
#undef HAVE_NATIVE_mpn_bdivmod
#undef HAVE_NATIVE_mpn_cmp
#undef HAVE_NATIVE_mpn_com_n
#undef HAVE_NATIVE_mpn_divrem
#undef HAVE_NATIVE_mpn_divrem_2
#undef HAVE_NATIVE_mpn_divrem_newton
#undef HAVE_NATIVE_mpn_divrem_classic
#undef HAVE_NATIVE_mpn_dump
#undef HAVE_NATIVE_mpn_gcd
#undef HAVE_NATIVE_mpn_gcd_1
#undef HAVE_NATIVE_mpn_gcd_finda
#undef HAVE_NATIVE_mpn_gcdext
#undef HAVE_NATIVE_mpn_get_str
#undef HAVE_NATIVE_mpn_invert_limb
#undef HAVE_NATIVE_mpn_ior_n
#undef HAVE_NATIVE_mpn_iorn_n
#undef HAVE_NATIVE_mpn_mul
#undef HAVE_NATIVE_mpn_mul_2
#undef HAVE_NATIVE_mpn_mul_3
#undef HAVE_NATIVE_mpn_mul_4
#undef HAVE_NATIVE_mpn_mul_n
#undef HAVE_NATIVE_mpn_nand_n
#undef HAVE_NATIVE_mpn_nior_n
#undef HAVE_NATIVE_mpn_perfect_square_p
#undef HAVE_NATIVE_mpn_preinv_mod_1
#undef HAVE_NATIVE_mpn_random2
#undef HAVE_NATIVE_mpn_random
#undef HAVE_NATIVE_mpn_rawrandom
#undef HAVE_NATIVE_mpn_scan0
#undef HAVE_NATIVE_mpn_scan1
#undef HAVE_NATIVE_mpn_set_str
#undef HAVE_NATIVE_mpn_sqrtrem
#undef HAVE_NATIVE_mpn_sqr_diagonal
#undef HAVE_NATIVE_mpn_sub
#undef HAVE_NATIVE_mpn_sub_1
#undef HAVE_NATIVE_mpn_udiv_w_sdiv
#undef HAVE_NATIVE_mpn_xor_n
#undef HAVE_NATIVE_mpn_xnor_n
#undef HAVE_NATIVE_mpn_add_n
#undef HAVE_NATIVE_mpn_add_nc
#undef HAVE_NATIVE_mpn_sub_n
#undef HAVE_NATIVE_mpn_sub_nc
#undef HAVE_NATIVE_mpn_addmul_1
#undef HAVE_NATIVE_mpn_addmul_1c
#undef HAVE_NATIVE_mpn_submul_1
#undef HAVE_NATIVE_mpn_submul_1c
#undef HAVE_NATIVE_mpn_copyd
#undef HAVE_NATIVE_mpn_copyi
#undef HAVE_NATIVE_mpn_divexact_1
#undef HAVE_NATIVE_mpn_divexact_by3c
#undef HAVE_NATIVE_mpn_divrem_1
#undef HAVE_NATIVE_mpn_divrem_1c
#undef HAVE_NATIVE_mpn_hamdist
#undef HAVE_NATIVE_mpn_popcount
#undef HAVE_NATIVE_mpn_lshift
#undef HAVE_NATIVE_mpn_rshift
#undef HAVE_NATIVE_mpn_mod_1
#undef HAVE_NATIVE_mpn_mod_1c
#undef HAVE_NATIVE_mpn_modexact_1_odd
#undef HAVE_NATIVE_mpn_modexact_1c_odd
#undef HAVE_NATIVE_mpn_mul_1
#undef HAVE_NATIVE_mpn_mul_1c
#undef HAVE_NATIVE_mpn_mul_basecase
#undef HAVE_NATIVE_mpn_sqr_basecase
#undef HAVE_NATIVE_mpn_umul_ppmm
#undef HAVE_NATIVE_mpn_udiv_qrnnd
/* For the Intel Pentium assembler code */
#define HAVE_NATIVE_mpn_add_n 1
#define HAVE_NATIVE_mpn_add_nc 1
#define HAVE_NATIVE_mpn_sub_n 1
#define HAVE_NATIVE_mpn_sub_nc 1
#define HAVE_NATIVE_mpn_addmul_1 1
#define HAVE_NATIVE_mpn_submul_1 1
#define HAVE_NATIVE_mpn_copyd 1
#define HAVE_NATIVE_mpn_copyi 1
#define HAVE_NATIVE_mpn_divexact_1 1
#define HAVE_NATIVE_mpn_divexact_by3c 1
#define HAVE_NATIVE_mpn_divrem_1 1
#define HAVE_NATIVE_mpn_divrem_1c 1
#define HAVE_NATIVE_mpn_lshift 1
#define HAVE_NATIVE_mpn_rshift 1
#define HAVE_NATIVE_mpn_mod_1 1
#define HAVE_NATIVE_mpn_mod_1c 1
#define HAVE_NATIVE_mpn_mul_1 1
#define HAVE_NATIVE_mpn_mul_basecase 1
#define HAVE_NATIVE_mpn_umul_ppmm 1
#define HAVE_NATIVE_mpn_udiv_qrnnd 1
/* a dummy to make autoheader happy */
#undef HAVE_NATIVE_
/* The gmp-mparam.h to update when tuning. */
#undef GMP_MPARAM_H_SUGGEST
/* Define if you have the `alarm' function. */
#undef HAVE_ALARM
/* Define if alloca() works (via gmp-impl.h). */
#define HAVE_ALLOCA 1
/* Define if you have <alloca.h> and it should be used (not on Ultrix). */
#undef HAVE_ALLOCA_H
/* Define if the compiler accepts gcc style __attribute__ ((const)) */
#undef HAVE_ATTRIBUTE_CONST
/* Define if the compiler accepts gcc style __attribute__ ((malloc)) */
#undef HAVE_ATTRIBUTE_MALLOC
/* Define if the compiler accepts gcc style __attribute__ ((mode (XX))) */
#undef HAVE_ATTRIBUTE_MODE
/* Define if the compiler accepts gcc style __attribute__ ((noreturn)) */
#undef HAVE_ATTRIBUTE_NORETURN
/* Define if tests/libtests has calling conventions checking for the CPU */
#undef HAVE_CALLING_CONVENTIONS
/* Define if you have the `clock' function. */
#define HAVE_CLOCK 1
/* Define if you have the `clock_gettime' function. */
#undef HAVE_CLOCK_GETTIME
/* Define if you have the `cputime' function. */
#undef HAVE_CPUTIME
/* Define to 1 if you have the declaration of `fgetc', and to 0 if you don't.
*/
#define HAVE_DECL_FGETC 1
/* Define to 1 if you have the declaration of `fscanf', and to 0 if you don't.
*/
#define HAVE_DECL_FSCANF 1
/* Define to 1 if you have the declaration of `optarg', and to 0 if you don't.
*/
#define HAVE_DECL_OPTARG 0
/* Define to 1 if you have the declaration of `ungetc', and to 0 if you don't.
*/
#define HAVE_DECL_UNGETC 1
/* Define to 1 if you have the declaration of `vfprintf', and to 0 if you
don't. */
#define HAVE_DECL_VFPRINTF 1
/* Define if denormalized floats work. */
#define HAVE_DENORMS 1
/* Define if you have the <dlfcn.h> header file. */
#undef HAVE_DLFCN_H
/* Define one (and only one) of the following for the format of a `double'.
If your format is not among these choices, or you don't know what it is,
then leave all of them undefined.
"IEEE_LITTLE_SWAPPED" means little endian, but with the two 4-byte halves
swapped, as used by ARM CPUs in little endian mode. */
#undef HAVE_DOUBLE_IEEE_BIG_ENDIAN
#define HAVE_DOUBLE_IEEE_LITTLE_ENDIAN 1
#undef HAVE_DOUBLE_IEEE_LITTLE_SWAPPED
#undef HAVE_DOUBLE_VAX_D
#undef HAVE_DOUBLE_VAX_G
#undef HAVE_DOUBLE_CRAY_CFP
/* Define if you have the <fcntl.h> header file. */
#define HAVE_FCNTL_H 1
/* Define if you have the <fpu_control.h> header file. */
#undef HAVE_FPU_CONTROL_H
/* Define if you have the `getpagesize' function. */
#undef HAVE_GETPAGESIZE
/* Define if you have the `getrusage' function. */
#undef HAVE_GETRUSAGE
/* Define if you have the `gettimeofday' function. */
#undef HAVE_GETTIMEOFDAY
/* Define if 0/0, 1/0, -1/0 and sqrt(-1) work to generate NaN/infinities. */
#define HAVE_INFS 1
/* Define if the system has the type `intmax_t'. */
#undef HAVE_INTMAX_T
/* Define if you have the <inttypes.h> header file. */
#undef HAVE_INTTYPES_H
/* Define one (just one) of the following for the endiannes of `mp_limb_t'.
If the endianness is not a simple big or little, or you don't know what
it is, then leave both of these undefined. */
#undef HAVE_LIMB_BIG_ENDIAN
#define HAVE_LIMB_LITTLE_ENDIAN 1
#define HAVE_STD__LOCALE 1
/* Define if you have the `localeconv' function. */
#define HAVE_LOCALECONV 1
/* Define if you have the <locale.h> header file. */
#define HAVE_LOCALE_H 1
/* Define if the system has the type `long double'. */
#define HAVE_LONG_DOUBLE 1
/* Define if the system has the type `long long'. */
#define HAVE_LONG_LONG 1
/* Define if you have the `lrand48' function. */
#undef HAVE_LRAND48
/* Define if you have the <memory.h> header file. */
#define HAVE_MEMORY_H 1
/* Define if you have the `memset' function. */
#define HAVE_MEMSET 1
/* Define if you have the `mmap' function. */
#undef HAVE_MMAP
/* Define if you have the `mprotect' function. */
#undef HAVE_MPROTECT
/* Define if you have the `obstack_vprintf' function. */
#undef HAVE_OBSTACK_VPRINTF
/* Define if you have the `popen' function. */
#undef HAVE_POPEN
/* Define if you have the `processor_info' function. */
#undef HAVE_PROCESSOR_INFO
/* Define if the system has the type `ptrdiff_t'. */
#define HAVE_PTRDIFF_T 1
/* Define if the system has the type `quad_t'. */
#undef HAVE_QUAD_T
#define HAVE_RAISE 1
/* Define if you have the `read_real_time' function. */
#undef HAVE_READ_REAL_TIME
/* Define if you have the `sigaction' function. */
#undef HAVE_SIGACTION
/* Define if you have the `sigaltstack' function. */
#undef HAVE_SIGALTSTACK
/* Define if you have the `sigstack' function. */
#undef HAVE_SIGSTACK
/* Tune directory speed_cyclecounter, undef=none, 1=32bits, 2=64bits) */
#define HAVE_SPEED_CYCLECOUNTER 2
/* Define if the system has the type `stack_t'. */
#undef HAVE_STACK_T
/* Define if <stdarg.h> exists and works */
#define HAVE_STDARG 1
/* Define if you have the <stdint.h> header file. */
#undef HAVE_STDINT_H
/* Define if you have the <stdlib.h> header file. */
#define HAVE_STDLIB_H 1
/* Define if you have the `strcasecmp' function. */
#undef HAVE_STRCASECMP
/* Define if you have the `strchr' function. */
#define HAVE_STRCHR 1
/* Define if cpp supports the ANSI # stringizing operator. */
#define HAVE_STRINGIZE 1
/* Define if you have the <strings.h> header file. */
#undef HAVE_STRINGS_H
/* Define if you have the <string.h> header file. */
#define HAVE_STRING_H 1
/* Define if you have the `strnlen' function. */
#define HAVE_STRNLEN 1
/* Define if you have the `strtoul' function. */
#define HAVE_STRTOUL 1
/* Define if you have the `sysconf' function. */
#undef HAVE_SYSCONF
/* Define if you have the `sysctl' function. */
#undef HAVE_SYSCTL
/* Define if you have the `sysctlbyname' function. */
#undef HAVE_SYSCTLBYNAME
/* Define if you have the `syssgi' function. */
#undef HAVE_SYSSGI
/* Define if you have the <sys/mman.h> header file. */
#undef HAVE_SYS_MMAN_H
/* Define if you have the <sys/param.h> header file. */
#undef HAVE_SYS_PARAM_H
/* Define if you have the <sys/processor.h> header file. */
#undef HAVE_SYS_PROCESSOR_H
/* Define if you have the <sys/resource.h> header file. */
#undef HAVE_SYS_RESOURCE_H
/* Define if you have the <sys/stat.h> header file. */
#define HAVE_SYS_STAT_H 1
/* Define if you have the <sys/sysctl.h> header file. */
#undef HAVE_SYS_SYSCTL_H
/* Define if you have the <sys/syssgi.h> header file. */
#undef HAVE_SYS_SYSSGI_H
/* Define if you have the <sys/systemcfg.h> header file. */
#undef HAVE_SYS_SYSTEMCFG_H
/* Define if you have the <sys/times.h> header file. */
#undef HAVE_SYS_TIMES_H
/* Define if you have the <sys/time.h> header file. */
#undef HAVE_SYS_TIME_H
/* Define if you have the <sys/types.h> header file. */
#define HAVE_SYS_TYPES_H 1
/* Define if you have the `times' function. */
#undef HAVE_TIMES
/* Define if you have the <unistd.h> header file. */
#undef HAVE_UNISTD_H
/* Define if you have vsnprintf and it works properly. */
#undef HAVE_VSNPRINTF
/* Assembler local label prefix */
#undef LSYM_PREFIX
/* Define if you have the `fesetround' function via the <fenv.h> header file.
*/
#undef MPFR_HAVE_FESETROUND
#define HAVE_SSTREAM 1
/* Name of package */
#define PACKAGE "gmp"
/* Define if compiler has function prototypes */
#define PROTOTYPES 1
/* Define as the return type of signal handlers (`int' or `void'). */
#define RETSIGTYPE void
/* The size of a `unsigned long', as computed by sizeof. */
#define SIZEOF_UNSIGNED_LONG 4
/* Define if sscanf requires writable inputs */
#undef SSCANF_WRITABLE_INPUT
/* Define if you have the ANSI C header files. */
#define STDC_HEADERS 1
/* Define if you can safely include both <sys/time.h> and <time.h>. */
#undef TIME_WITH_SYS_TIME
/* Maximum size the tune program can test for KARATSUBA_SQR_THRESHOLD */
#define TUNE_KARATSUBA_SQR_MAX 67
/* Version number of package */
#define VERSION "4.2.1 (VC8)"
/* ./configure --enable-assert option, to enable some ASSERT()s */
#undef WANT_ASSERT
/* ./configure --enable-fft option, to enable FFTs for multiplication */
#define WANT_FFT 1
/* Define to 1 if --enable-profiling=gprof */
#undef WANT_PROFILING_GPROF
/* Define to 1 if --enable-profiling=prof */
#undef WANT_PROFILING_PROF
/* --enable-alloca=yes */
#undef WANT_TMP_ALLOCA
/* --enable-alloca=debug */
#undef WANT_TMP_DEBUG
/* --enable-alloca=malloc-notreentrant */
#undef WANT_TMP_NOTREENTRANT
/* --enable-alloca=malloc-reentrant */
#define WANT_TMP_REENTRANT 1
/* Define if your processor stores words with the most significant byte first
(like Motorola and SPARC, unlike Intel and VAX). */
#undef WORDS_BIGENDIAN
/* Define if `lex' declares `yytext' as a `char *' by default, not a `char[]'.
*/
#undef YYTEXT_POINTER
/* Define as `__inline' if that's what the C compiler calls it, or to nothing
if it is not supported. */
#define inline __inline
/* Define to empty if the keyword `volatile' does not work. Warning: valid
code using `volatile' can become incorrect without. Disable with care. */
#undef volatile
#ifdef _MSC_VER
#define strcasecmp _stricmp
#define strncasecmp _strnicmp
#define alloca _alloca
#define HAVE_STRCASECMP 1
#define HAVE_STRNCASECMP 1
#endif

View file

@ -1,549 +0,0 @@
/* Templates for defines setup by configure.
Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or (at your
option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
MA 02111-1307, USA. */
/* Define one (and only one) of these for the CPU host.
Only hosts that are going to be tested for need to be in this list,
not everything that can possibly be selected.
*/
#undef HAVE_HOST_CPU_alpha
#undef HAVE_HOST_CPU_alphaev5
#undef HAVE_HOST_CPU_alphaev6
#undef HAVE_HOST_CPU_alphaev67
#undef HAVE_HOST_CPU_m68k
#undef HAVE_HOST_CPU_m68000
#undef HAVE_HOST_CPU_m68010
#undef HAVE_HOST_CPU_m68020
#undef HAVE_HOST_CPU_m68030
#undef HAVE_HOST_CPU_m68040
#undef HAVE_HOST_CPU_m68060
#undef HAVE_HOST_CPU_m68302
#undef HAVE_HOST_CPU_m68360
#undef HAVE_HOST_CPU_powerpc604
#undef HAVE_HOST_CPU_powerpc604e
#undef HAVE_HOST_CPU_powerpc750
#undef HAVE_HOST_CPU_powerpc7400
#undef HAVE_HOST_CPU_sparc
#undef HAVE_HOST_CPU_sparcv8
#undef HAVE_HOST_CPU_supersparc
#undef HAVE_HOST_CPU_sparclite
#undef HAVE_HOST_CPU_microsparc
#undef HAVE_HOST_CPU_ultrasparc1
#undef HAVE_HOST_CPU_ultrasparc2
#undef HAVE_HOST_CPU_sparc64
#undef HAVE_HOST_CPU_hppa1_0
#undef HAVE_HOST_CPU_hppa1_1
#undef HAVE_HOST_CPU_hppa2_0n
#undef HAVE_HOST_CPU_hppa2_0w
#undef HAVE_HOST_CPU_i386
#undef HAVE_HOST_CPU_i486
#undef HAVE_HOST_CPU_i586
#undef HAVE_HOST_CPU_i686
#undef HAVE_HOST_CPU_pentium
#undef HAVE_HOST_CPU_pentiummmx
#undef HAVE_HOST_CPU_pentiumpro
#undef HAVE_HOST_CPU_pentium2
#define HAVE_HOST_CPU_pentium3 1
#undef HAVE_HOST_CPU_k5
#undef HAVE_HOST_CPU_k6
#undef HAVE_HOST_CPU_k62
#undef HAVE_HOST_CPU_k63
#undef HAVE_HOST_CPU_athlon
/* a dummy to make autoheader happy */
#undef HAVE_HOST_CPU_
/* Define one (and only one) of these for the CPU host family.
Only hosts that are going to be tested for need to be in this list,
not everything that can possibly be selected.
*/
#undef HAVE_HOST_CPU_FAMILY_power
#undef HAVE_HOST_CPU_FAMILY_powerpc
#define HAVE_HOST_CPU_FAMILY_x86 1
/* Define if we have native implementation of function.
(use just one of the three following defines)
*/
#undef HAVE_NATIVE_mpn_add
#undef HAVE_NATIVE_mpn_add_1
#undef HAVE_NATIVE_mpn_addmul_2
#undef HAVE_NATIVE_mpn_addmul_3
#undef HAVE_NATIVE_mpn_addmul_4
#undef HAVE_NATIVE_mpn_addsub_n
#undef HAVE_NATIVE_mpn_addsub_nc
#undef HAVE_NATIVE_mpn_and_n
#undef HAVE_NATIVE_mpn_andn_n
#undef HAVE_NATIVE_mpn_bdivmod
#undef HAVE_NATIVE_mpn_cmp
#undef HAVE_NATIVE_mpn_com_n
#undef HAVE_NATIVE_mpn_divrem
#undef HAVE_NATIVE_mpn_divrem_2
#undef HAVE_NATIVE_mpn_divrem_newton
#undef HAVE_NATIVE_mpn_divrem_classic
#undef HAVE_NATIVE_mpn_dump
#undef HAVE_NATIVE_mpn_gcd
#undef HAVE_NATIVE_mpn_gcd_1
#undef HAVE_NATIVE_mpn_gcd_finda
#undef HAVE_NATIVE_mpn_gcdext
#undef HAVE_NATIVE_mpn_get_str
#undef HAVE_NATIVE_mpn_invert_limb
#undef HAVE_NATIVE_mpn_ior_n
#undef HAVE_NATIVE_mpn_iorn_n
#undef HAVE_NATIVE_mpn_mul
#undef HAVE_NATIVE_mpn_mul_2
#undef HAVE_NATIVE_mpn_mul_3
#undef HAVE_NATIVE_mpn_mul_4
#undef HAVE_NATIVE_mpn_mul_n
#undef HAVE_NATIVE_mpn_nand_n
#undef HAVE_NATIVE_mpn_nior_n
#undef HAVE_NATIVE_mpn_perfect_square_p
#undef HAVE_NATIVE_mpn_preinv_mod_1
#undef HAVE_NATIVE_mpn_random2
#undef HAVE_NATIVE_mpn_random
#undef HAVE_NATIVE_mpn_rawrandom
#undef HAVE_NATIVE_mpn_scan0
#undef HAVE_NATIVE_mpn_scan1
#undef HAVE_NATIVE_mpn_set_str
#undef HAVE_NATIVE_mpn_sqrtrem
#undef HAVE_NATIVE_mpn_sqr_diagonal
#undef HAVE_NATIVE_mpn_sub
#undef HAVE_NATIVE_mpn_sub_1
#undef HAVE_NATIVE_mpn_udiv_w_sdiv
#undef HAVE_NATIVE_mpn_xor_n
#undef HAVE_NATIVE_mpn_xnor_n
#undef HAVE_NATIVE_mpn_add_n
#undef HAVE_NATIVE_mpn_add_nc
#undef HAVE_NATIVE_mpn_sub_n
#undef HAVE_NATIVE_mpn_sub_nc
#undef HAVE_NATIVE_mpn_addmul_1
#undef HAVE_NATIVE_mpn_addmul_1c
#undef HAVE_NATIVE_mpn_submul_1
#undef HAVE_NATIVE_mpn_submul_1c
#undef HAVE_NATIVE_mpn_copyd
#undef HAVE_NATIVE_mpn_copyi
#undef HAVE_NATIVE_mpn_divexact_1
#undef HAVE_NATIVE_mpn_divexact_by3c
#undef HAVE_NATIVE_mpn_divrem_1
#undef HAVE_NATIVE_mpn_divrem_1c
#undef HAVE_NATIVE_mpn_hamdist
#undef HAVE_NATIVE_mpn_popcount
#undef HAVE_NATIVE_mpn_lshift
#undef HAVE_NATIVE_mpn_rshift
#undef HAVE_NATIVE_mpn_mod_1
#undef HAVE_NATIVE_mpn_mod_1c
#undef HAVE_NATIVE_mpn_modexact_1_odd
#undef HAVE_NATIVE_mpn_modexact_1c_odd
#undef HAVE_NATIVE_mpn_mul_1
#undef HAVE_NATIVE_mpn_mul_1c
#undef HAVE_NATIVE_mpn_mul_basecase
#undef HAVE_NATIVE_mpn_sqr_basecase
#undef HAVE_NATIVE_mpn_umul_ppmm
#undef HAVE_NATIVE_mpn_udiv_qrnnd
/* For the Intel P3 assembler code */
#define HAVE_NATIVE_mpn_add_n 1
#define HAVE_NATIVE_mpn_add_nc 1
#define HAVE_NATIVE_mpn_sub_n 1
#define HAVE_NATIVE_mpn_sub_nc 1
#define HAVE_NATIVE_mpn_addmul_1 1
#define HAVE_NATIVE_mpn_submul_1 1
#define HAVE_NATIVE_mpn_copyd 1
#define HAVE_NATIVE_mpn_copyi 1
#define HAVE_NATIVE_mpn_divexact_1 1
#define HAVE_NATIVE_mpn_divexact_by3c 1
#define HAVE_NATIVE_mpn_divrem_1 1
#define HAVE_NATIVE_mpn_divrem_1c 1
#define HAVE_NATIVE_mpn_hamdist 1
#define HAVE_NATIVE_mpn_popcount 1
#define HAVE_NATIVE_mpn_lshift 1
#define HAVE_NATIVE_mpn_rshift 1
#define HAVE_NATIVE_mpn_mod_1 1
#define HAVE_NATIVE_mpn_mod_1c 1
#define HAVE_NATIVE_mpn_modexact_1_odd 1
#define HAVE_NATIVE_mpn_modexact_1c_odd 1
#define HAVE_NATIVE_mpn_mul_1 1
#define HAVE_NATIVE_mpn_mul_basecase 1
#define HAVE_NATIVE_mpn_sqr_basecase 1
#define HAVE_NATIVE_mpn_umul_ppmm 1
#define HAVE_NATIVE_mpn_udiv_qrnnd 1
/* a dummy to make autoheader happy */
#undef HAVE_NATIVE_
/* The gmp-mparam.h to update when tuning. */
#undef GMP_MPARAM_H_SUGGEST
/* Define if you have the `alarm' function. */
#undef HAVE_ALARM
/* Define if alloca() works (via gmp-impl.h). */
#define HAVE_ALLOCA 1
/* Define if you have <alloca.h> and it should be used (not on Ultrix). */
#undef HAVE_ALLOCA_H
/* Define if the compiler accepts gcc style __attribute__ ((const)) */
#undef HAVE_ATTRIBUTE_CONST
/* Define if the compiler accepts gcc style __attribute__ ((malloc)) */
#undef HAVE_ATTRIBUTE_MALLOC
/* Define if the compiler accepts gcc style __attribute__ ((mode (XX))) */
#undef HAVE_ATTRIBUTE_MODE
/* Define if the compiler accepts gcc style __attribute__ ((noreturn)) */
#undef HAVE_ATTRIBUTE_NORETURN
/* Define if tests/libtests has calling conventions checking for the CPU */
#undef HAVE_CALLING_CONVENTIONS
/* Define if you have the `clock' function. */
#define HAVE_CLOCK 1
/* Define if you have the `clock_gettime' function. */
#undef HAVE_CLOCK_GETTIME
/* Define if you have the `cputime' function. */
#undef HAVE_CPUTIME
/* Define to 1 if you have the declaration of `fgetc', and to 0 if you don't.
*/
#define HAVE_DECL_FGETC 1
/* Define to 1 if you have the declaration of `fscanf', and to 0 if you don't.
*/
#define HAVE_DECL_FSCANF 1
/* Define to 1 if you have the declaration of `optarg', and to 0 if you don't.
*/
#define HAVE_DECL_OPTARG 0
/* Define to 1 if you have the declaration of `ungetc', and to 0 if you don't.
*/
#define HAVE_DECL_UNGETC 1
/* Define to 1 if you have the declaration of `vfprintf', and to 0 if you
don't. */
#define HAVE_DECL_VFPRINTF 1
/* Define if denormalized floats work. */
#define HAVE_DENORMS 1
/* Define if you have the <dlfcn.h> header file. */
#undef HAVE_DLFCN_H
/* Define one (and only one) of the following for the format of a `double'.
If your format is not among these choices, or you don't know what it is,
then leave all of them undefined.
"IEEE_LITTLE_SWAPPED" means little endian, but with the two 4-byte halves
swapped, as used by ARM CPUs in little endian mode. */
#undef HAVE_DOUBLE_IEEE_BIG_ENDIAN
#define HAVE_DOUBLE_IEEE_LITTLE_ENDIAN 1
#undef HAVE_DOUBLE_IEEE_LITTLE_SWAPPED
#undef HAVE_DOUBLE_VAX_D
#undef HAVE_DOUBLE_VAX_G
#undef HAVE_DOUBLE_CRAY_CFP
/* Define if you have the <fcntl.h> header file. */
#define HAVE_FCNTL_H 1
/* Define if you have the <fpu_control.h> header file. */
#undef HAVE_FPU_CONTROL_H
/* Define if you have the `getpagesize' function. */
#undef HAVE_GETPAGESIZE
/* Define if you have the `getrusage' function. */
#undef HAVE_GETRUSAGE
/* Define if you have the `gettimeofday' function. */
#undef HAVE_GETTIMEOFDAY
/* Define if 0/0, 1/0, -1/0 and sqrt(-1) work to generate NaN/infinities. */
#define HAVE_INFS 1
/* Define if the system has the type `intmax_t'. */
#undef HAVE_INTMAX_T
/* Define if you have the <inttypes.h> header file. */
#undef HAVE_INTTYPES_H
/* Define one (just one) of the following for the endiannes of `mp_limb_t'.
If the endianness is not a simple big or little, or you don't know what
it is, then leave both of these undefined. */
#undef HAVE_LIMB_BIG_ENDIAN
#define HAVE_LIMB_LITTLE_ENDIAN 1
#define HAVE_STD__LOCALE 1
/* Define if you have the `localeconv' function. */
#define HAVE_LOCALECONV 1
/* Define if you have the <locale.h> header file. */
#define HAVE_LOCALE_H 1
/* Define if the system has the type `long double'. */
#define HAVE_LONG_DOUBLE 1
/* Define if the system has the type `long long'. */
#define HAVE_LONG_LONG 1
/* Define if you have the `lrand48' function. */
#undef HAVE_LRAND48
/* Define if you have the <memory.h> header file. */
#define HAVE_MEMORY_H 1
/* Define if you have the `memset' function. */
#define HAVE_MEMSET 1
/* Define if you have the `mmap' function. */
#undef HAVE_MMAP
/* Define if you have the `mprotect' function. */
#undef HAVE_MPROTECT
/* Define if you have the `obstack_vprintf' function. */
#undef HAVE_OBSTACK_VPRINTF
/* Define if you have the `popen' function. */
#undef HAVE_POPEN
/* Define if you have the `processor_info' function. */
#undef HAVE_PROCESSOR_INFO
/* Define if the system has the type `ptrdiff_t'. */
#define HAVE_PTRDIFF_T 1
/* Define if the system has the type `quad_t'. */
#undef HAVE_QUAD_T
#define HAVE_RAISE 1
/* Define if you have the `read_real_time' function. */
#undef HAVE_READ_REAL_TIME
/* Define if you have the `sigaction' function. */
#undef HAVE_SIGACTION
/* Define if you have the `sigaltstack' function. */
#undef HAVE_SIGALTSTACK
/* Define if you have the `sigstack' function. */
#undef HAVE_SIGSTACK
/* Tune directory speed_cyclecounter, undef=none, 1=32bits, 2=64bits) */
#define HAVE_SPEED_CYCLECOUNTER 2
/* Define if the system has the type `stack_t'. */
#undef HAVE_STACK_T
/* Define if <stdarg.h> exists and works */
#define HAVE_STDARG 1
/* Define if you have the <stdint.h> header file. */
#undef HAVE_STDINT_H
/* Define if you have the <stdlib.h> header file. */
#define HAVE_STDLIB_H 1
/* Define if you have the `strcasecmp' function. */
#undef HAVE_STRCASECMP
/* Define if you have the `strchr' function. */
#define HAVE_STRCHR 1
/* Define if cpp supports the ANSI # stringizing operator. */
#define HAVE_STRINGIZE 1
/* Define if you have the <strings.h> header file. */
#undef HAVE_STRINGS_H
/* Define if you have the <string.h> header file. */
#define HAVE_STRING_H 1
/* Define if you have the `strnlen' function. */
#define HAVE_STRNLEN 1
/* Define if you have the `strtoul' function. */
#define HAVE_STRTOUL 1
/* Define if you have the `sysconf' function. */
#undef HAVE_SYSCONF
/* Define if you have the `sysctl' function. */
#undef HAVE_SYSCTL
/* Define if you have the `sysctlbyname' function. */
#undef HAVE_SYSCTLBYNAME
/* Define if you have the `syssgi' function. */
#undef HAVE_SYSSGI
/* Define if you have the <sys/mman.h> header file. */
#undef HAVE_SYS_MMAN_H
/* Define if you have the <sys/param.h> header file. */
#undef HAVE_SYS_PARAM_H
/* Define if you have the <sys/processor.h> header file. */
#undef HAVE_SYS_PROCESSOR_H
/* Define if you have the <sys/resource.h> header file. */
#undef HAVE_SYS_RESOURCE_H
/* Define if you have the <sys/stat.h> header file. */
#define HAVE_SYS_STAT_H 1
/* Define if you have the <sys/sysctl.h> header file. */
#undef HAVE_SYS_SYSCTL_H
/* Define if you have the <sys/syssgi.h> header file. */
#undef HAVE_SYS_SYSSGI_H
/* Define if you have the <sys/systemcfg.h> header file. */
#undef HAVE_SYS_SYSTEMCFG_H
/* Define if you have the <sys/times.h> header file. */
#undef HAVE_SYS_TIMES_H
/* Define if you have the <sys/time.h> header file. */
#undef HAVE_SYS_TIME_H
/* Define if you have the <sys/types.h> header file. */
#define HAVE_SYS_TYPES_H 1
/* Define if you have the `times' function. */
#undef HAVE_TIMES
/* Define if you have the <unistd.h> header file. */
#undef HAVE_UNISTD_H
/* Define if you have vsnprintf and it works properly. */
#undef HAVE_VSNPRINTF
/* Assembler local label prefix */
#undef LSYM_PREFIX
/* Define if you have the `fesetround' function via the <fenv.h> header file.
*/
#undef MPFR_HAVE_FESETROUND
#define HAVE_SSTREAM 1
/* Name of package */
#define PACKAGE "gmp"
/* Define if compiler has function prototypes */
#define PROTOTYPES 1
/* Define as the return type of signal handlers (`int' or `void'). */
#define RETSIGTYPE void
/* The size of a `unsigned long', as computed by sizeof. */
#define SIZEOF_UNSIGNED_LONG 4
/* Define if sscanf requires writable inputs */
#undef SSCANF_WRITABLE_INPUT
/* Define if you have the ANSI C header files. */
#define STDC_HEADERS 1
/* Define if you can safely include both <sys/time.h> and <time.h>. */
#undef TIME_WITH_SYS_TIME
/* Maximum size the tune program can test for KARATSUBA_SQR_THRESHOLD */
#define TUNE_KARATSUBA_SQR_MAX 67
/* Version number of package */
#define VERSION "4.2.1 (VC8)"
/* ./configure --enable-assert option, to enable some ASSERT()s */
#undef WANT_ASSERT
/* ./configure --enable-fft option, to enable FFTs for multiplication */
#define WANT_FFT 1
/* Define to 1 if --enable-profiling=gprof */
#undef WANT_PROFILING_GPROF
/* Define to 1 if --enable-profiling=prof */
#undef WANT_PROFILING_PROF
/* --enable-alloca=yes */
#undef WANT_TMP_ALLOCA
/* --enable-alloca=debug */
#undef WANT_TMP_DEBUG
/* --enable-alloca=malloc-notreentrant */
#undef WANT_TMP_NOTREENTRANT
/* --enable-alloca=malloc-reentrant */
#define WANT_TMP_REENTRANT 1
/* Define if your processor stores words with the most significant byte first
(like Motorola and SPARC, unlike Intel and VAX). */
#undef WORDS_BIGENDIAN
/* Define if `lex' declares `yytext' as a `char *' by default, not a `char[]'.
*/
#undef YYTEXT_POINTER
/* Define as `__inline' if that's what the C compiler calls it, or to nothing
if it is not supported. */
#define inline __inline
/* Define to empty if the keyword `volatile' does not work. Warning: valid
code using `volatile' can become incorrect without. Disable with care. */
#undef volatile
#ifdef _MSC_VER
#define strcasecmp _stricmp
#define strncasecmp _strnicmp
#define alloca _alloca
#define HAVE_STRCASECMP 1
#define HAVE_STRNCASECMP 1
#endif

View file

@ -1,553 +0,0 @@
/* Templates for defines setup by configure.
Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or (at your
option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
MA 02111-1307, USA. */
/* Define one (and only one) of these for the CPU host.
Only hosts that are going to be tested for need to be in this list,
not everything that can possibly be selected.
*/
#undef HAVE_HOST_CPU_alpha
#undef HAVE_HOST_CPU_alphaev5
#undef HAVE_HOST_CPU_alphaev6
#undef HAVE_HOST_CPU_alphaev67
#undef HAVE_HOST_CPU_m68k
#undef HAVE_HOST_CPU_m68000
#undef HAVE_HOST_CPU_m68010
#undef HAVE_HOST_CPU_m68020
#undef HAVE_HOST_CPU_m68030
#undef HAVE_HOST_CPU_m68040
#undef HAVE_HOST_CPU_m68060
#undef HAVE_HOST_CPU_m68302
#undef HAVE_HOST_CPU_m68360
#undef HAVE_HOST_CPU_powerpc604
#undef HAVE_HOST_CPU_powerpc604e
#undef HAVE_HOST_CPU_powerpc750
#undef HAVE_HOST_CPU_powerpc7400
#undef HAVE_HOST_CPU_sparc
#undef HAVE_HOST_CPU_sparcv8
#undef HAVE_HOST_CPU_supersparc
#undef HAVE_HOST_CPU_sparclite
#undef HAVE_HOST_CPU_microsparc
#undef HAVE_HOST_CPU_ultrasparc1
#undef HAVE_HOST_CPU_ultrasparc2
#undef HAVE_HOST_CPU_sparc64
#undef HAVE_HOST_CPU_hppa1_0
#undef HAVE_HOST_CPU_hppa1_1
#undef HAVE_HOST_CPU_hppa2_0n
#undef HAVE_HOST_CPU_hppa2_0w
#undef HAVE_HOST_CPU_i386
#undef HAVE_HOST_CPU_i486
#undef HAVE_HOST_CPU_i586
#undef HAVE_HOST_CPU_i686
#undef HAVE_HOST_CPU_pentium
#undef HAVE_HOST_CPU_pentiummmx
#undef HAVE_HOST_CPU_pentiumpro
#undef HAVE_HOST_CPU_pentium2
#undef HAVE_HOST_CPU_pentium3
#define HAVE_HOST_CPU_pentium4 1
#undef HAVE_HOST_CPU_k5
#undef HAVE_HOST_CPU_k6
#undef HAVE_HOST_CPU_k62
#undef HAVE_HOST_CPU_k63
#undef HAVE_HOST_CPU_athlon
/* a dummy to make autoheader happy */
#undef HAVE_HOST_CPU_
/* Define one (and only one) of these for the CPU host family.
Only hosts that are going to be tested for need to be in this list,
not everything that can possibly be selected.
*/
#undef HAVE_HOST_CPU_FAMILY_power
#undef HAVE_HOST_CPU_FAMILY_powerpc
#define HAVE_HOST_CPU_FAMILY_x86 1
/* Define if we have native implementation of function.
(use just one of the three following defines)
*/
#undef HAVE_NATIVE_mpn_add
#undef HAVE_NATIVE_mpn_add_1
#undef HAVE_NATIVE_mpn_addmul_2
#undef HAVE_NATIVE_mpn_addmul_3
#undef HAVE_NATIVE_mpn_addmul_4
#undef HAVE_NATIVE_mpn_addsub_n
#undef HAVE_NATIVE_mpn_addsub_nc
#undef HAVE_NATIVE_mpn_and_n
#undef HAVE_NATIVE_mpn_andn_n
#undef HAVE_NATIVE_mpn_bdivmod
#undef HAVE_NATIVE_mpn_cmp
#undef HAVE_NATIVE_mpn_com_n
#undef HAVE_NATIVE_mpn_divrem
#undef HAVE_NATIVE_mpn_divrem_2
#undef HAVE_NATIVE_mpn_divrem_newton
#undef HAVE_NATIVE_mpn_divrem_classic
#undef HAVE_NATIVE_mpn_dump
#undef HAVE_NATIVE_mpn_gcd
#undef HAVE_NATIVE_mpn_gcd_1
#undef HAVE_NATIVE_mpn_gcd_finda
#undef HAVE_NATIVE_mpn_gcdext
#undef HAVE_NATIVE_mpn_get_str
#undef HAVE_NATIVE_mpn_invert_limb
#undef HAVE_NATIVE_mpn_ior_n
#undef HAVE_NATIVE_mpn_iorn_n
#undef HAVE_NATIVE_mpn_mul
#undef HAVE_NATIVE_mpn_mul_2
#undef HAVE_NATIVE_mpn_mul_3
#undef HAVE_NATIVE_mpn_mul_4
#undef HAVE_NATIVE_mpn_mul_n
#undef HAVE_NATIVE_mpn_nand_n
#undef HAVE_NATIVE_mpn_nior_n
#undef HAVE_NATIVE_mpn_perfect_square_p
#undef HAVE_NATIVE_mpn_preinv_mod_1
#undef HAVE_NATIVE_mpn_random2
#undef HAVE_NATIVE_mpn_random
#undef HAVE_NATIVE_mpn_rawrandom
#undef HAVE_NATIVE_mpn_scan0
#undef HAVE_NATIVE_mpn_scan1
#undef HAVE_NATIVE_mpn_set_str
#undef HAVE_NATIVE_mpn_sqrtrem
#undef HAVE_NATIVE_mpn_sqr_diagonal
#undef HAVE_NATIVE_mpn_sub
#undef HAVE_NATIVE_mpn_sub_1
#undef HAVE_NATIVE_mpn_udiv_w_sdiv
#undef HAVE_NATIVE_mpn_xor_n
#undef HAVE_NATIVE_mpn_xnor_n
#undef HAVE_NATIVE_mpn_add_n
#undef HAVE_NATIVE_mpn_add_nc
#undef HAVE_NATIVE_mpn_sub_n
#undef HAVE_NATIVE_mpn_sub_nc
#undef HAVE_NATIVE_mpn_addmul_1
#undef HAVE_NATIVE_mpn_addmul_1c
#undef HAVE_NATIVE_mpn_submul_1
#undef HAVE_NATIVE_mpn_submul_1c
#undef HAVE_NATIVE_mpn_copyd
#undef HAVE_NATIVE_mpn_copyi
#undef HAVE_NATIVE_mpn_divexact_1
#undef HAVE_NATIVE_mpn_divexact_by3c
#undef HAVE_NATIVE_mpn_divrem_1
#undef HAVE_NATIVE_mpn_divrem_1c
#undef HAVE_NATIVE_mpn_hamdist
#undef HAVE_NATIVE_mpn_popcount
#undef HAVE_NATIVE_mpn_lshift
#undef HAVE_NATIVE_mpn_rshift
#undef HAVE_NATIVE_mpn_mod_1
#undef HAVE_NATIVE_mpn_mod_1c
#undef HAVE_NATIVE_mpn_modexact_1_odd
#undef HAVE_NATIVE_mpn_modexact_1c_odd
#undef HAVE_NATIVE_mpn_mul_1
#undef HAVE_NATIVE_mpn_mul_1c
#undef HAVE_NATIVE_mpn_mul_basecase
#undef HAVE_NATIVE_mpn_sqr_basecase
#undef HAVE_NATIVE_mpn_umul_ppmm
#undef HAVE_NATIVE_mpn_udiv_qrnnd
/* For the Intel P4 assembler code */
#define HAVE_NATIVE_mpn_add_n 1
#define HAVE_NATIVE_mpn_add_nc 1
#define HAVE_NATIVE_mpn_sub_n 1
#define HAVE_NATIVE_mpn_sub_nc 1
#define HAVE_NATIVE_mpn_addmul_1 1
#define HAVE_NATIVE_mpn_addmul_1c 1
#define HAVE_NATIVE_mpn_submul_1 1
#define HAVE_NATIVE_mpn_submul_1c 1
#define HAVE_NATIVE_mpn_copyd 1
#define HAVE_NATIVE_mpn_copyi 1
#define HAVE_NATIVE_mpn_divexact_1 1
#define HAVE_NATIVE_mpn_divexact_by3c 1
#define HAVE_NATIVE_mpn_divrem_1 1
#define HAVE_NATIVE_mpn_divrem_1c 1
#define HAVE_NATIVE_mpn_hamdist 1
#define HAVE_NATIVE_mpn_popcount 1
#define HAVE_NATIVE_mpn_lshift 1
#define HAVE_NATIVE_mpn_rshift 1
#define HAVE_NATIVE_mpn_mod_1 1
#define HAVE_NATIVE_mpn_mod_1c 1
#define HAVE_NATIVE_mpn_modexact_1_odd 1
#define HAVE_NATIVE_mpn_modexact_1c_odd 1
#define HAVE_NATIVE_mpn_mul_1 1
#define HAVE_NATIVE_mpn_mul_1c 1
#define HAVE_NATIVE_mpn_mul_basecase 1
#define HAVE_NATIVE_mpn_sqr_basecase 1
#define HAVE_NATIVE_mpn_umul_ppmm 1
#define HAVE_NATIVE_mpn_udiv_qrnnd 1
/* a dummy to make autoheader happy */
#undef HAVE_NATIVE_
/* The gmp-mparam.h to update when tuning. */
#undef GMP_MPARAM_H_SUGGEST
/* Define if you have the `alarm' function. */
#undef HAVE_ALARM
/* Define if alloca() works (via gmp-impl.h). */
#define HAVE_ALLOCA 1
/* Define if you have <alloca.h> and it should be used (not on Ultrix). */
#undef HAVE_ALLOCA_H
/* Define if the compiler accepts gcc style __attribute__ ((const)) */
#undef HAVE_ATTRIBUTE_CONST
/* Define if the compiler accepts gcc style __attribute__ ((malloc)) */
#undef HAVE_ATTRIBUTE_MALLOC
/* Define if the compiler accepts gcc style __attribute__ ((mode (XX))) */
#undef HAVE_ATTRIBUTE_MODE
/* Define if the compiler accepts gcc style __attribute__ ((noreturn)) */
#undef HAVE_ATTRIBUTE_NORETURN
/* Define if tests/libtests has calling conventions checking for the CPU */
#undef HAVE_CALLING_CONVENTIONS
/* Define if you have the `clock' function. */
#define HAVE_CLOCK 1
/* Define if you have the `clock_gettime' function. */
#undef HAVE_CLOCK_GETTIME
/* Define if you have the `cputime' function. */
#undef HAVE_CPUTIME
/* Define to 1 if you have the declaration of `fgetc', and to 0 if you don't.
*/
#define HAVE_DECL_FGETC 1
/* Define to 1 if you have the declaration of `fscanf', and to 0 if you don't.
*/
#define HAVE_DECL_FSCANF 1
/* Define to 1 if you have the declaration of `optarg', and to 0 if you don't.
*/
#define HAVE_DECL_OPTARG 0
/* Define to 1 if you have the declaration of `ungetc', and to 0 if you don't.
*/
#define HAVE_DECL_UNGETC 1
/* Define to 1 if you have the declaration of `vfprintf', and to 0 if you
don't. */
#define HAVE_DECL_VFPRINTF 1
/* Define if denormalized floats work. */
#define HAVE_DENORMS 1
/* Define if you have the <dlfcn.h> header file. */
#undef HAVE_DLFCN_H
/* Define one (and only one) of the following for the format of a `double'.
If your format is not among these choices, or you don't know what it is,
then leave all of them undefined.
"IEEE_LITTLE_SWAPPED" means little endian, but with the two 4-byte halves
swapped, as used by ARM CPUs in little endian mode. */
#undef HAVE_DOUBLE_IEEE_BIG_ENDIAN
#define HAVE_DOUBLE_IEEE_LITTLE_ENDIAN 1
#undef HAVE_DOUBLE_IEEE_LITTLE_SWAPPED
#undef HAVE_DOUBLE_VAX_D
#undef HAVE_DOUBLE_VAX_G
#undef HAVE_DOUBLE_CRAY_CFP
/* Define if you have the <fcntl.h> header file. */
#define HAVE_FCNTL_H 1
/* Define if you have the <fpu_control.h> header file. */
#undef HAVE_FPU_CONTROL_H
/* Define if you have the `getpagesize' function. */
#undef HAVE_GETPAGESIZE
/* Define if you have the `getrusage' function. */
#undef HAVE_GETRUSAGE
/* Define if you have the `gettimeofday' function. */
#undef HAVE_GETTIMEOFDAY
/* Define if 0/0, 1/0, -1/0 and sqrt(-1) work to generate NaN/infinities. */
#define HAVE_INFS 1
/* Define if the system has the type `intmax_t'. */
#undef HAVE_INTMAX_T
/* Define if you have the <inttypes.h> header file. */
#undef HAVE_INTTYPES_H
/* Define one (just one) of the following for the endiannes of `mp_limb_t'.
If the endianness is not a simple big or little, or you don't know what
it is, then leave both of these undefined. */
#undef HAVE_LIMB_BIG_ENDIAN
#define HAVE_LIMB_LITTLE_ENDIAN 1
#define HAVE_STD__LOCALE 1
/* Define if you have the `localeconv' function. */
#define HAVE_LOCALECONV 1
/* Define if you have the <locale.h> header file. */
#define HAVE_LOCALE_H 1
/* Define if the system has the type `long double'. */
#define HAVE_LONG_DOUBLE 1
/* Define if the system has the type `long long'. */
#define HAVE_LONG_LONG 1
/* Define if you have the `lrand48' function. */
#undef HAVE_LRAND48
/* Define if you have the <memory.h> header file. */
#define HAVE_MEMORY_H 1
/* Define if you have the `memset' function. */
#define HAVE_MEMSET 1
/* Define if you have the `mmap' function. */
#undef HAVE_MMAP
/* Define if you have the `mprotect' function. */
#undef HAVE_MPROTECT
/* Define if you have the `obstack_vprintf' function. */
#undef HAVE_OBSTACK_VPRINTF
/* Define if you have the `popen' function. */
#undef HAVE_POPEN
/* Define if you have the `processor_info' function. */
#undef HAVE_PROCESSOR_INFO
/* Define if the system has the type `ptrdiff_t'. */
#define HAVE_PTRDIFF_T 1
/* Define if the system has the type `quad_t'. */
#undef HAVE_QUAD_T
#define HAVE_RAISE 1
/* Define if you have the `read_real_time' function. */
#undef HAVE_READ_REAL_TIME
/* Define if you have the `sigaction' function. */
#undef HAVE_SIGACTION
/* Define if you have the `sigaltstack' function. */
#undef HAVE_SIGALTSTACK
/* Define if you have the `sigstack' function. */
#undef HAVE_SIGSTACK
/* Tune directory speed_cyclecounter, undef=none, 1=32bits, 2=64bits) */
#define HAVE_SPEED_CYCLECOUNTER 2
/* Define if the system has the type `stack_t'. */
#undef HAVE_STACK_T
/* Define if <stdarg.h> exists and works */
#define HAVE_STDARG 1
/* Define if you have the <stdint.h> header file. */
#undef HAVE_STDINT_H
/* Define if you have the <stdlib.h> header file. */
#define HAVE_STDLIB_H 1
/* Define if you have the `strcasecmp' function. */
#undef HAVE_STRCASECMP
/* Define if you have the `strchr' function. */
#define HAVE_STRCHR 1
/* Define if cpp supports the ANSI # stringizing operator. */
#define HAVE_STRINGIZE 1
/* Define if you have the <strings.h> header file. */
#undef HAVE_STRINGS_H
/* Define if you have the <string.h> header file. */
#define HAVE_STRING_H 1
/* Define if you have the `strnlen' function. */
#define HAVE_STRNLEN 1
/* Define if you have the `strtoul' function. */
#define HAVE_STRTOUL 1
/* Define if you have the `sysconf' function. */
#undef HAVE_SYSCONF
/* Define if you have the `sysctl' function. */
#undef HAVE_SYSCTL
/* Define if you have the `sysctlbyname' function. */
#undef HAVE_SYSCTLBYNAME
/* Define if you have the `syssgi' function. */
#undef HAVE_SYSSGI
/* Define if you have the <sys/mman.h> header file. */
#undef HAVE_SYS_MMAN_H
/* Define if you have the <sys/param.h> header file. */
#undef HAVE_SYS_PARAM_H
/* Define if you have the <sys/processor.h> header file. */
#undef HAVE_SYS_PROCESSOR_H
/* Define if you have the <sys/resource.h> header file. */
#undef HAVE_SYS_RESOURCE_H
/* Define if you have the <sys/stat.h> header file. */
#define HAVE_SYS_STAT_H 1
/* Define if you have the <sys/sysctl.h> header file. */
#undef HAVE_SYS_SYSCTL_H
/* Define if you have the <sys/syssgi.h> header file. */
#undef HAVE_SYS_SYSSGI_H
/* Define if you have the <sys/systemcfg.h> header file. */
#undef HAVE_SYS_SYSTEMCFG_H
/* Define if you have the <sys/times.h> header file. */
#undef HAVE_SYS_TIMES_H
/* Define if you have the <sys/time.h> header file. */
#undef HAVE_SYS_TIME_H
/* Define if you have the <sys/types.h> header file. */
#define HAVE_SYS_TYPES_H 1
/* Define if you have the `times' function. */
#undef HAVE_TIMES
/* Define if you have the <unistd.h> header file. */
#undef HAVE_UNISTD_H
/* Define if you have vsnprintf and it works properly. */
#undef HAVE_VSNPRINTF
/* Assembler local label prefix */
#undef LSYM_PREFIX
/* Define if you have the `fesetround' function via the <fenv.h> header file.
*/
#undef MPFR_HAVE_FESETROUND
#define HAVE_SSTREAM 1
/* Name of package */
#define PACKAGE "gmp"
/* Define if compiler has function prototypes */
#define PROTOTYPES 1
/* Define as the return type of signal handlers (`int' or `void'). */
#define RETSIGTYPE void
/* The size of a `unsigned long', as computed by sizeof. */
#define SIZEOF_UNSIGNED_LONG 4
/* Define if sscanf requires writable inputs */
#undef SSCANF_WRITABLE_INPUT
/* Define if you have the ANSI C header files. */
#define STDC_HEADERS 1
/* Define if you can safely include both <sys/time.h> and <time.h>. */
#undef TIME_WITH_SYS_TIME
/* Maximum size the tune program can test for KARATSUBA_SQR_THRESHOLD */
#define TUNE_KARATSUBA_SQR_MAX 67
/* Version number of package */
#define VERSION "4.2.1 (VC8)"
/* ./configure --enable-assert option, to enable some ASSERT()s */
#undef WANT_ASSERT
/* ./configure --enable-fft option, to enable FFTs for multiplication */
#define WANT_FFT 1
/* Define to 1 if --enable-profiling=gprof */
#undef WANT_PROFILING_GPROF
/* Define to 1 if --enable-profiling=prof */
#undef WANT_PROFILING_PROF
/* --enable-alloca=yes */
#undef WANT_TMP_ALLOCA
/* --enable-alloca=debug */
#undef WANT_TMP_DEBUG
/* --enable-alloca=malloc-notreentrant */
#undef WANT_TMP_NOTREENTRANT
/* --enable-alloca=malloc-reentrant */
#define WANT_TMP_REENTRANT 1
/* Define if your processor stores words with the most significant byte first
(like Motorola and SPARC, unlike Intel and VAX). */
#undef WORDS_BIGENDIAN
/* Define if `lex' declares `yytext' as a `char *' by default, not a `char[]'.
*/
#undef YYTEXT_POINTER
/* Define as `__inline' if that's what the C compiler calls it, or to nothing
if it is not supported. */
#define inline __inline
/* Define to empty if the keyword `volatile' does not work. Warning: valid
code using `volatile' can become incorrect without. Disable with care. */
#undef volatile
#ifdef _MSC_VER
#define strcasecmp _stricmp
#define strncasecmp _strnicmp
#define alloca _alloca
#define HAVE_STRCASECMP 1
#define HAVE_STRNCASECMP 1
#endif

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,4 +0,0 @@
if not exist %1 ( echo file_copy failure: %1 not found && goto exit)
if exist %2 ( fc %1 %2 > nul && if not %errorlevel 1 goto exit )
echo copying %1 to %2 && copy %1 %2
:exit

View file

@ -1,183 +0,0 @@
/* Generate mp_bases data.
Copyright 1991, 1993, 1994, 1996, 2000, 2002, 2004 Free Software Foundation,
Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or (at your
option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#include <math.h>
#include "dumbmp.c"
int chars_per_limb;
double chars_per_bit_exactly;
mpz_t big_base;
int normalization_steps;
mpz_t big_base_inverted;
mpz_t t;
#define POW2_P(n) (((n) & ((n) - 1)) == 0)
unsigned int
ulog2 (unsigned int x)
{
unsigned int i;
for (i = 0; x != 0; i++)
x >>= 1;
return i;
}
void
generate (int limb_bits, int nail_bits, int base)
{
int numb_bits = limb_bits - nail_bits;
mpz_set_ui (t, 1L);
mpz_mul_2exp (t, t, numb_bits);
mpz_set_ui (big_base, 1L);
chars_per_limb = 0;
for (;;)
{
mpz_mul_ui (big_base, big_base, (long) base);
if (mpz_cmp (big_base, t) > 0)
break;
chars_per_limb++;
}
chars_per_bit_exactly = 0.69314718055994530942 / log ((double) base);
mpz_ui_pow_ui (big_base, (long) base, (long) chars_per_limb);
normalization_steps = limb_bits - mpz_sizeinbase (big_base, 2);
mpz_set_ui (t, 1L);
mpz_mul_2exp (t, t, 2*limb_bits - normalization_steps);
mpz_tdiv_q (big_base_inverted, t, big_base);
mpz_set_ui (t, 1L);
mpz_mul_2exp (t, t, limb_bits);
mpz_sub (big_base_inverted, big_base_inverted, t);
}
void
header (int limb_bits, int nail_bits)
{
int numb_bits = limb_bits - nail_bits;
generate (limb_bits, nail_bits, 10);
printf ("/* This file generated by gen-bases.c - DO NOT EDIT. */\n");
printf ("\n");
printf ("#if GMP_NUMB_BITS != %d\n", numb_bits);
printf ("#error, error, this data is for %d bits\n", numb_bits);
printf ("#endif\n");
printf ("\n");
printf ("/* mp_bases[10] data, as literal values */\n");
printf ("#define MP_BASES_CHARS_PER_LIMB_10 %d\n", chars_per_limb);
printf ("#define MP_BASES_BIG_BASE_10 CNST_LIMB(0x");
mpz_out_str (stdout, 16, big_base);
printf (")\n");
printf ("#define MP_BASES_BIG_BASE_INVERTED_10 CNST_LIMB(0x");
mpz_out_str (stdout, 16, big_base_inverted);
printf (")\n");
printf ("#define MP_BASES_NORMALIZATION_STEPS_10 %d\n", normalization_steps);
}
void
table (int limb_bits, int nail_bits)
{
int numb_bits = limb_bits - nail_bits;
int base;
printf ("/* This file generated by gen-bases.c - DO NOT EDIT. */\n");
printf ("\n");
printf ("#include \"gmp.h\"\n");
printf ("#include \"gmp-impl.h\"\n");
printf ("\n");
printf ("#if GMP_NUMB_BITS != %d\n", numb_bits);
printf ("#error, error, this data is for %d bits\n", numb_bits);
printf ("#endif\n");
printf ("\n");
puts ("const struct bases mp_bases[257] =\n{");
puts (" /* 0 */ { 0, 0.0, 0 },");
puts (" /* 1 */ { 0, 1e37, 0 },");
for (base = 2; base <= 256; base++)
{
generate (limb_bits, nail_bits, base);
printf (" /* %3u */ { ", base);
if (POW2_P (base))
{
printf ("%u, %.16f, 0x%x },\n",
chars_per_limb, chars_per_bit_exactly, ulog2 (base) - 1);
}
else
{
printf ("%u, %.16f, CNST_LIMB(0x",
chars_per_limb, chars_per_bit_exactly);
mpz_out_str (stdout, 16, big_base);
printf ("), CNST_LIMB(0x");
mpz_out_str (stdout, 16, big_base_inverted);
printf (") },\n");
}
}
puts ("};");
}
int
main (int argc, char **argv)
{
int limb_bits, nail_bits;
mpz_init (big_base);
mpz_init (big_base_inverted);
mpz_init (t);
if (argc != 4)
{
fprintf (stderr, "Usage: gen-bases <header|table> <limbbits> <nailbits>\n");
exit (1);
}
limb_bits = atoi (argv[2]);
nail_bits = atoi (argv[3]);
if (limb_bits <= 0
|| nail_bits < 0
|| nail_bits >= limb_bits)
{
fprintf (stderr, "Invalid limb/nail bits: %d %d\n",
limb_bits, nail_bits);
exit (1);
}
if (strcmp (argv[1], "header") == 0)
header (limb_bits, nail_bits);
else if (strcmp (argv[1], "table") == 0)
table (limb_bits, nail_bits);
else
{
fprintf (stderr, "Invalid header/table choice: %s\n", argv[1]);
exit (1);
}
return 0;
}

View file

@ -1,184 +0,0 @@
<?xml version="1.0" encoding="Windows-1252"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="8.00"
Name="gen-bases"
ProjectGUID="{2297FA81-6D9D-4DC3-BA42-04E93F397047}"
RootNamespace="genbases"
Keyword="Win32Proj"
>
<Platforms>
<Platform
Name="Win32"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory="$(SolutionDir)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
CharacterSet="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="..\..\"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
UsePrecompiledHeader="0"
WarningLevel="3"
Detect64BitPortabilityProblems="true"
DebugInformationFormat="4"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
LinkIncremental="2"
GenerateDebugInformation="true"
SubSystem="1"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
CommandLine=""
/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="$(SolutionDir)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
CharacterSet="1"
WholeProgramOptimization="1"
>
<Tool
Name="VCPreBuildEventTool"
CommandLine=""
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\..\"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
RuntimeLibrary="2"
UsePrecompiledHeader="0"
WarningLevel="3"
Detect64BitPortabilityProblems="true"
DebugInformationFormat="0"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
LinkIncremental="1"
GenerateDebugInformation="false"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="2"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
CommandLine=""
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<File
RelativePath=".\gen-bases.c"
>
</File>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

View file

@ -1,161 +0,0 @@
/* Generate mpz_fac_ui data.
Copyright 2002 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or (at your
option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#include <stdio.h>
#include <stdlib.h>
#include "dumbmp.c"
/* sets x=y*(y+2)*(y+4)*....*(y+2*(z-1)) */
void
odd_products (mpz_t x, mpz_t y, int z)
{
mpz_t t;
mpz_init_set (t, y);
mpz_set_ui (x, 1);
for (; z != 0; z--)
{
mpz_mul (x, x, t);
mpz_add_ui (t, t, 2);
}
mpz_clear (t);
return;
}
/* returns 0 on success */
int
gen_consts (int numb, int nail, int limb)
{
mpz_t x, y, z, t;
unsigned long a, b, first = 1;
printf ("/* This file is automatically generated by gen-fac_ui.c */\n\n");
printf ("#if GMP_NUMB_BITS != %d\n", numb);
printf ("#error , error this data is for %d GMP_NUMB_BITS only\n", numb);
printf ("#endif\n");
printf ("#if GMP_LIMB_BITS != %d\n", limb);
printf ("#error , error this data is for %d GMP_LIMB_BITS only\n", limb);
printf ("#endif\n");
printf
("/* This table is 0!,1!,2!,3!,...,n! where n! has <= GMP_NUMB_BITS bits */\n");
printf
("#define ONE_LIMB_FACTORIAL_TABLE CNST_LIMB(0x1),CNST_LIMB(0x1),CNST_LIMB(0x2),");
mpz_init_set_ui (x, 2);
for (b = 3;; b++)
{
mpz_mul_ui (x, x, b); /* so b!=a */
if (mpz_sizeinbase (x, 2) > numb)
break;
if (first)
{
first = 0;
}
else
{
printf ("),");
}
printf ("CNST_LIMB(0x");
mpz_out_str (stdout, 16, x);
}
printf (")\n");
mpz_set_ui (x, 1);
mpz_mul_2exp (x, x, limb + 1); /* x=2^(limb+1) */
mpz_init (y);
mpz_set_ui (y, 10000);
mpz_mul (x, x, y); /* x=2^(limb+1)*10^4 */
mpz_set_ui (y, 27182); /* exp(1)*10^4 */
mpz_tdiv_q (x, x, y); /* x=2^(limb+1)/exp(1) */
printf ("\n/* is 2^(GMP_LIMB_BITS+1)/exp(1) */\n");
printf ("#define FAC2OVERE CNST_LIMB(0x");
mpz_out_str (stdout, 16, x);
printf (")\n");
printf
("\n/* FACMULn is largest odd x such that x*(x+2)*...*(x+2(n-1))<=2^GMP_NUMB_BITS-1 */\n\n");
mpz_init (z);
mpz_init (t);
for (a = 2; a <= 4; a++)
{
mpz_set_ui (x, 1);
mpz_mul_2exp (x, x, numb);
mpz_root (x, x, a);
/* so x is approx sol */
if (mpz_even_p (x))
mpz_sub_ui (x, x, 1);
mpz_set_ui (y, 1);
mpz_mul_2exp (y, y, numb);
mpz_sub_ui (y, y, 1);
/* decrement x until we are <= real sol */
do
{
mpz_sub_ui (x, x, 2);
odd_products (t, x, a);
if (mpz_cmp (t, y) <= 0)
break;
}
while (1);
/* increment x until > real sol */
do
{
mpz_add_ui (x, x, 2);
odd_products (t, x, a);
if (mpz_cmp (t, y) > 0)
break;
}
while (1);
/* dec once to get real sol */
mpz_sub_ui (x, x, 2);
printf ("#define FACMUL%lu CNST_LIMB(0x", a);
mpz_out_str (stdout, 16, x);
printf (")\n");
}
return 0;
}
int
main (int argc, char *argv[])
{
int nail_bits, limb_bits, numb_bits;
if (argc != 3)
{
fprintf (stderr, "Usage: gen-fac_ui limbbits nailbits\n");
exit (1);
}
limb_bits = atoi (argv[1]);
nail_bits = atoi (argv[2]);
numb_bits = limb_bits - nail_bits;
if (limb_bits < 0 || nail_bits < 0 || numb_bits < 0)
{
fprintf (stderr, "Invalid limb/nail bits %d,%d\n", limb_bits,
nail_bits);
exit (1);
}
gen_consts (numb_bits, nail_bits, limb_bits);
return 0;
}

View file

@ -1,183 +0,0 @@
<?xml version="1.0" encoding="Windows-1252"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="8.00"
Name="gen-fac_ui"
ProjectGUID="{001E0D42-4AF4-44B8-A8B2-3CD46D537DBE}"
RootNamespace="genfac_ui"
Keyword="Win32Proj"
>
<Platforms>
<Platform
Name="Win32"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory="$(SolutionDir)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
CharacterSet="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="..\..\"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
UsePrecompiledHeader="0"
WarningLevel="3"
Detect64BitPortabilityProblems="true"
DebugInformationFormat="4"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
LinkIncremental="2"
GenerateDebugInformation="true"
SubSystem="1"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
CommandLine=""
/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="$(SolutionDir)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
CharacterSet="1"
WholeProgramOptimization="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\..\"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
RuntimeLibrary="2"
UsePrecompiledHeader="0"
WarningLevel="3"
Detect64BitPortabilityProblems="true"
DebugInformationFormat="0"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
LinkIncremental="1"
GenerateDebugInformation="false"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="2"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
CommandLine=""
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<File
RelativePath=".\gen-fac_ui.c"
>
</File>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

View file

@ -1,147 +0,0 @@
/* Generate Fibonacci table data.
Copyright 2001, 2002, 2004 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or (at your
option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#include <stdio.h>
#include "dumbmp.c"
mpz_t *f;
int fnum, fib_limit, luc_limit;
void
generate (int numb_bits)
{
mpz_t limit, l;
int falloc, i;
mpz_init_set_ui (limit, 1L);
mpz_mul_2exp (limit, limit, numb_bits);
/* fib(2n) > 2^n, so use 2n as a limit for the table size */
falloc = 2 * numb_bits;
f = (mpz_t *) xmalloc (falloc * sizeof (*f));
mpz_init_set_ui (f[0], 1L); /* F[-1] */
mpz_init_set_ui (f[1], 0L); /* F[0] */
mpz_init (l);
for (i = 2; ; i++)
{
ASSERT (i < falloc);
/* F[i] = F[i-1] + F[i-2] */
mpz_init (f[i]);
mpz_add (f[i], f[i-1], f[i-2]);
if (mpz_cmp (f[i], limit) >= 0)
break;
fnum = i+1;
fib_limit = i-1;
/* L[i] = F[i]+2*F[i-1] */
mpz_add (l, f[i], f[i-1]);
mpz_add (l, l, f[i-1]);
if (mpz_cmp (l, limit) < 0)
luc_limit = i-1;
}
mpz_clear (limit);
}
void
header (int numb_bits)
{
printf ("/* This file generated by gen-fib.c - DO NOT EDIT. */\n");
printf ("\n");
printf ("#if GMP_NUMB_BITS != %d\n", numb_bits);
printf ("#error, error, this data is for %d bits\n", numb_bits);
printf ("#endif\n");
printf ("\n");
printf ("#define FIB_TABLE_LIMIT %d\n", fib_limit);
printf ("#define FIB_TABLE_LUCNUM_LIMIT %d\n", luc_limit);
}
void
table (int numb_bits)
{
int i;
printf ("/* This file generated by gen-fib.c - DO NOT EDIT. */\n");
printf ("\n");
printf ("#include \"gmp.h\"\n");
printf ("#include \"gmp-impl.h\"\n");
printf ("\n");
printf ("#if GMP_NUMB_BITS != %d\n", numb_bits);
printf ("#error, error, this data is for %d bits\n", numb_bits);
printf ("#endif\n");
printf ("\n");
printf ("const mp_limb_t\n");
printf ("__gmp_fib_table[FIB_TABLE_LIMIT+2] = {\n");
for (i = 0; i < fnum; i++)
{
printf (" CNST_LIMB (0x");
mpz_out_str (stdout, 16, f[i]);
printf ("), /* %d */\n", i-1);
}
printf ("};\n");
}
int
main (int argc, char *argv[])
{
int limb_bits, nail_bits, numb_bits;
if (argc != 4)
{
fprintf (stderr, "Usage: gen-bases <header|table> <limbbits> <nailbits>\n");
exit (1);
}
limb_bits = atoi (argv[2]);
nail_bits = atoi (argv[3]);
if (limb_bits <= 0
|| nail_bits < 0
|| nail_bits >= limb_bits)
{
fprintf (stderr, "Invalid limb/nail bits: %d %d\n",
limb_bits, nail_bits);
exit (1);
}
numb_bits = limb_bits - nail_bits;
generate (numb_bits);
if (strcmp (argv[1], "header") == 0)
header (numb_bits);
else if (strcmp (argv[1], "table") == 0)
table (numb_bits);
else
{
fprintf (stderr, "Invalid header/table choice: %s\n", argv[1]);
exit (1);
}
return 0;
}

View file

@ -1,183 +0,0 @@
<?xml version="1.0" encoding="Windows-1252"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="8.00"
Name="gen-fib"
ProjectGUID="{D3C6D6B7-CD38-4D49-9BA7-1FBB35F77223}"
RootNamespace="genfib"
Keyword="Win32Proj"
>
<Platforms>
<Platform
Name="Win32"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory="$(SolutionDir)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
CharacterSet="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="..\..\"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
UsePrecompiledHeader="0"
WarningLevel="3"
Detect64BitPortabilityProblems="true"
DebugInformationFormat="4"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
LinkIncremental="2"
GenerateDebugInformation="true"
SubSystem="1"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
CommandLine=""
/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="$(SolutionDir)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
CharacterSet="1"
WholeProgramOptimization="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\..\"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
RuntimeLibrary="2"
UsePrecompiledHeader="0"
WarningLevel="3"
Detect64BitPortabilityProblems="true"
DebugInformationFormat="0"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
LinkIncremental="1"
GenerateDebugInformation="false"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="2"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
CommandLine=""
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<File
RelativePath=".\gen-fib.c"
>
</File>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

View file

@ -1,578 +0,0 @@
/* Generate perfect square testing data.
Copyright 2002, 2003, 2004 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or (at your
option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#include <stdio.h>
#include <stdlib.h>
#include "dumbmp.c"
/* The aim of this program is to choose either mpn_mod_34lsub1 or mpn_mod_1
(plus a PERFSQR_PP modulus), and generate tables indicating quadratic
residues and non-residues modulo small factors of that modulus.
For the usual 32 or 64 bit cases mpn_mod_34lsub1 gets used. That
function exists specifically because 2^24-1 and 2^48-1 have nice sets of
prime factors. For other limb sizes it's considered, but if it doesn't
have good factors then mpn_mod_1 will be used instead.
When mpn_mod_1 is used, the modulus PERFSQR_PP is created from a
selection of small primes, chosen to fill PERFSQR_MOD_BITS of a limb,
with that bit count chosen so (2*GMP_LIMB_BITS)*2^PERFSQR_MOD_BITS <=
GMP_LIMB_MAX, allowing PERFSQR_MOD_IDX in mpn/generic/perfsqr.c to do its
calculation within a single limb.
In either case primes can be combined to make divisors. The table data
then effectively indicates remainders which are quadratic residues mod
all the primes. This sort of combining reduces the number of steps
needed after mpn_mod_34lsub1 or mpn_mod_1, saving code size and time.
Nothing is gained or lost in terms of detections, the same total fraction
of non-residues will be identified.
Nothing particularly sophisticated is attempted for combining factors to
make divisors. This is probably a kind of knapsack problem so it'd be
too hard to attempt anything completely general. For the usual 32 and 64
bit limbs we get a good enough result just pairing the biggest and
smallest which fit together, repeatedly.
Another aim is to get powerful combinations, ie. divisors which identify
biggest fraction of non-residues, and have those run first. Again for
the usual 32 and 64 bits it seems good enough just to pair for big
divisors then sort according to the resulting fraction of non-residues
identified.
Also in this program, a table sq_res_0x100 of residues modulo 256 is
generated. This simply fills bits into limbs of the appropriate
build-time GMP_LIMB_BITS each.
*/
/* Normally we aren't using const in gen*.c programs, so as not to have to
bother figuring out if it works, but using it with f_cmp_divisor and
f_cmp_fraction avoids warnings from the qsort calls. */
/* Same tests as gmp.h. */
#if defined (__STDC__) \
|| defined (__cplusplus) \
|| defined (_AIX) \
|| defined (__DECC) \
|| (defined (__mips) && defined (_SYSTYPE_SVR4)) \
|| defined (_MSC_VER) \
|| defined (_WIN32)
#define HAVE_CONST 1
#endif
#if ! HAVE_CONST
#define const
#endif
mpz_t *sq_res_0x100; /* table of limbs */
int nsq_res_0x100; /* elements in sq_res_0x100 array */
int sq_res_0x100_num; /* squares in sq_res_0x100 */
double sq_res_0x100_fraction; /* sq_res_0x100_num / 256 */
int mod34_bits; /* 3*GMP_NUMB_BITS/4 */
int mod_bits; /* bits from PERFSQR_MOD_34 or MOD_PP */
int max_divisor; /* all divisors <= max_divisor */
int max_divisor_bits; /* ceil(log2(max_divisor)) */
double total_fraction; /* of squares */
mpz_t pp; /* product of primes, or 0 if mod_34lsub1 used */
mpz_t pp_norm; /* pp shifted so NUMB high bit set */
mpz_t pp_inverted; /* invert_limb style inverse */
mpz_t mod_mask; /* 2^mod_bits-1 */
char mod34_excuse[128]; /* why mod_34lsub1 not used (if it's not) */
/* raw list of divisors of 2^mod34_bits-1 or pp, just to show in a comment */
struct rawfactor_t {
int divisor;
int multiplicity;
};
struct rawfactor_t *rawfactor;
int nrawfactor;
/* factors of 2^mod34_bits-1 or pp and associated data, after combining etc */
struct factor_t {
int divisor;
mpz_t inverse; /* 1/divisor mod 2^mod_bits */
mpz_t mask; /* indicating squares mod divisor */
double fraction; /* squares/total */
};
struct factor_t *factor;
int nfactor; /* entries in use in factor array */
int factor_alloc; /* entries allocated to factor array */
int
f_cmp_divisor (const void *parg, const void *qarg)
{
const struct factor_t *p, *q;
p = parg;
q = qarg;
if (p->divisor > q->divisor)
return 1;
else if (p->divisor < q->divisor)
return -1;
else
return 0;
}
int
f_cmp_fraction (const void *parg, const void *qarg)
{
const struct factor_t *p, *q;
p = parg;
q = qarg;
if (p->fraction > q->fraction)
return 1;
else if (p->fraction < q->fraction)
return -1;
else
return 0;
}
/* Remove array[idx] by copying the remainder down, and adjust narray
accordingly. */
#define COLLAPSE_ELEMENT(array, idx, narray) \
do { \
mem_copyi ((char *) &(array)[idx], \
(char *) &(array)[idx+1], \
((narray)-((idx)+1)) * sizeof (array[0])); \
(narray)--; \
} while (0)
/* return n*2^p mod m */
int
mul_2exp_mod (int n, int p, int m)
{
int i;
for (i = 0; i < p; i++)
n = (2 * n) % m;
return n;
}
/* return -n mod m */
int
neg_mod (int n, int m)
{
ASSERT (n >= 0 && n < m);
return (n == 0 ? 0 : m-n);
}
/* Set "mask" to a value such that "mask & (1<<idx)" is non-zero if
"-(idx<<mod_bits)" can be a square modulo m. */
void
square_mask (mpz_t mask, int m)
{
int p, i, r, idx;
p = mul_2exp_mod (1, mod_bits, m);
p = neg_mod (p, m);
mpz_set_ui (mask, 0L);
for (i = 0; i < m; i++)
{
r = (i * i) % m;
idx = (r * p) % m;
mpz_setbit (mask, (unsigned long) idx);
}
}
void
generate_sq_res_0x100 (int limb_bits)
{
int i, res;
nsq_res_0x100 = (0x100 + limb_bits - 1) / limb_bits;
sq_res_0x100 = (mpz_t *) xmalloc (nsq_res_0x100 * sizeof (*sq_res_0x100));
for (i = 0; i < nsq_res_0x100; i++)
mpz_init_set_ui (sq_res_0x100[i], 0L);
for (i = 0; i < 0x100; i++)
{
res = (i * i) % 0x100;
mpz_setbit (sq_res_0x100[res / limb_bits],
(unsigned long) (res % limb_bits));
}
sq_res_0x100_num = 0;
for (i = 0; i < nsq_res_0x100; i++)
sq_res_0x100_num += mpz_popcount (sq_res_0x100[i]);
sq_res_0x100_fraction = (double) sq_res_0x100_num / 256.0;
}
void
generate_mod (int limb_bits, int nail_bits)
{
int numb_bits = limb_bits - nail_bits;
int i, divisor;
mpz_init_set_ui (pp, 0L);
mpz_init_set_ui (pp_norm, 0L);
mpz_init_set_ui (pp_inverted, 0L);
/* no more than limb_bits many factors in a one limb modulus (and of
course in reality nothing like that many) */
factor_alloc = limb_bits;
factor = (struct factor_t *) xmalloc (factor_alloc * sizeof (*factor));
rawfactor = (struct rawfactor_t *)
xmalloc (factor_alloc * sizeof (*rawfactor));
if (numb_bits % 4 != 0)
{
strcpy (mod34_excuse, "GMP_NUMB_BITS % 4 != 0");
goto use_pp;
}
max_divisor = 2*limb_bits;
max_divisor_bits = log2_ceil (max_divisor);
if (numb_bits / 4 < max_divisor_bits)
{
/* Wind back to one limb worth of max_divisor, if that will let us use
mpn_mod_34lsub1. */
max_divisor = limb_bits;
max_divisor_bits = log2_ceil (max_divisor);
if (numb_bits / 4 < max_divisor_bits)
{
strcpy (mod34_excuse, "GMP_NUMB_BITS / 4 too small");
goto use_pp;
}
}
{
/* Can use mpn_mod_34lsub1, find small factors of 2^mod34_bits-1. */
mpz_t m, q, r;
int multiplicity;
mod34_bits = (numb_bits / 4) * 3;
/* mpn_mod_34lsub1 returns a full limb value, PERFSQR_MOD_34 folds it at
the mod34_bits mark, adding the two halves for a remainder of at most
mod34_bits+1 many bits */
mod_bits = mod34_bits + 1;
mpz_init_set_ui (m, 1L);
mpz_mul_2exp (m, m, mod34_bits);
mpz_sub_ui (m, m, 1L);
mpz_init (q);
mpz_init (r);
for (i = 3; i <= max_divisor; i++)
{
if (! isprime (i))
continue;
mpz_tdiv_qr_ui (q, r, m, (unsigned long) i);
if (mpz_sgn (r) != 0)
continue;
/* if a repeated prime is found it's used as an i^n in one factor */
divisor = 1;
multiplicity = 0;
do
{
if (divisor > max_divisor / i)
break;
multiplicity++;
mpz_set (m, q);
mpz_tdiv_qr_ui (q, r, m, (unsigned long) i);
}
while (mpz_sgn (r) == 0);
ASSERT (nrawfactor < factor_alloc);
rawfactor[nrawfactor].divisor = i;
rawfactor[nrawfactor].multiplicity = multiplicity;
nrawfactor++;
}
mpz_clear (m);
mpz_clear (q);
mpz_clear (r);
}
if (nrawfactor <= 2)
{
mpz_t new_pp;
sprintf (mod34_excuse, "only %d small factor%s",
nrawfactor, nrawfactor == 1 ? "" : "s");
use_pp:
/* reset to two limbs of max_divisor, in case the mpn_mod_34lsub1 code
tried with just one */
max_divisor = 2*limb_bits;
max_divisor_bits = log2_ceil (max_divisor);
mpz_init (new_pp);
nrawfactor = 0;
mod_bits = MIN (numb_bits, limb_bits - max_divisor_bits);
/* one copy of each small prime */
mpz_set_ui (pp, 1L);
for (i = 3; i <= max_divisor; i++)
{
if (! isprime (i))
continue;
mpz_mul_ui (new_pp, pp, (unsigned long) i);
if (mpz_sizeinbase (new_pp, 2) > mod_bits)
break;
mpz_set (pp, new_pp);
ASSERT (nrawfactor < factor_alloc);
rawfactor[nrawfactor].divisor = i;
rawfactor[nrawfactor].multiplicity = 1;
nrawfactor++;
}
/* Plus an extra copy of one or more of the primes selected, if that
still fits in max_divisor and the total in mod_bits. Usually only
3 or 5 will be candidates */
for (i = nrawfactor-1; i >= 0; i--)
{
if (rawfactor[i].divisor > max_divisor / rawfactor[i].divisor)
continue;
mpz_mul_ui (new_pp, pp, (unsigned long) rawfactor[i].divisor);
if (mpz_sizeinbase (new_pp, 2) > mod_bits)
continue;
mpz_set (pp, new_pp);
rawfactor[i].multiplicity++;
}
mod_bits = mpz_sizeinbase (pp, 2);
mpz_set (pp_norm, pp);
while (mpz_sizeinbase (pp_norm, 2) < numb_bits)
mpz_add (pp_norm, pp_norm, pp_norm);
mpz_preinv_invert (pp_inverted, pp_norm, numb_bits);
mpz_clear (new_pp);
}
/* start the factor array */
for (i = 0; i < nrawfactor; i++)
{
int j;
ASSERT (nfactor < factor_alloc);
factor[nfactor].divisor = 1;
for (j = 0; j < rawfactor[i].multiplicity; j++)
factor[nfactor].divisor *= rawfactor[i].divisor;
nfactor++;
}
combine:
/* Combine entries in the factor array. Combine the smallest entry with
the biggest one that will fit with it (ie. under max_divisor), then
repeat that with the new smallest entry. */
qsort (factor, nfactor, sizeof (factor[0]), f_cmp_divisor);
for (i = nfactor-1; i >= 1; i--)
{
if (factor[i].divisor <= max_divisor / factor[0].divisor)
{
factor[0].divisor *= factor[i].divisor;
COLLAPSE_ELEMENT (factor, i, nfactor);
goto combine;
}
}
total_fraction = 1.0;
for (i = 0; i < nfactor; i++)
{
mpz_init (factor[i].inverse);
mpz_invert_ui_2exp (factor[i].inverse,
(unsigned long) factor[i].divisor,
(unsigned long) mod_bits);
mpz_init (factor[i].mask);
square_mask (factor[i].mask, factor[i].divisor);
/* fraction of possible squares */
factor[i].fraction = (double) mpz_popcount (factor[i].mask)
/ factor[i].divisor;
/* total fraction of possible squares */
total_fraction *= factor[i].fraction;
}
/* best tests first (ie. smallest fraction) */
qsort (factor, nfactor, sizeof (factor[0]), f_cmp_fraction);
}
void
print (int limb_bits, int nail_bits)
{
int i;
mpz_t mhi, mlo;
printf ("/* This file generated by gen-psqr.c - DO NOT EDIT. */\n");
printf ("\n");
printf ("#if GMP_LIMB_BITS != %d || GMP_NAIL_BITS != %d\n",
limb_bits, nail_bits);
printf ("#error, error, this data is for %d bit limb and %d bit nail\n",
limb_bits, nail_bits);
printf ("#endif\n");
printf ("\n");
printf ("/* Non-zero bit indicates a quadratic residue mod 0x100.\n");
printf (" This test identifies %.2f%% as non-squares (%d/256). */\n",
(1.0 - sq_res_0x100_fraction) * 100.0,
0x100 - sq_res_0x100_num);
printf ("static const mp_limb_t\n");
printf ("sq_res_0x100[%d] = {\n", nsq_res_0x100);
for (i = 0; i < nsq_res_0x100; i++)
{
printf (" CNST_LIMB(0x");
mpz_out_str (stdout, 16, sq_res_0x100[i]);
printf ("),\n");
}
printf ("};\n");
printf ("\n");
if (mpz_sgn (pp) != 0)
{
printf ("/* mpn_mod_34lsub1 not used due to %s */\n", mod34_excuse);
printf ("/* PERFSQR_PP = ");
}
else
printf ("/* 2^%d-1 = ", mod34_bits);
for (i = 0; i < nrawfactor; i++)
{
if (i != 0)
printf (" * ");
printf ("%d", rawfactor[i].divisor);
if (rawfactor[i].multiplicity != 1)
printf ("^%d", rawfactor[i].multiplicity);
}
printf (" %s*/\n", mpz_sgn (pp) == 0 ? "... " : "");
printf ("#define PERFSQR_MOD_BITS %d\n", mod_bits);
if (mpz_sgn (pp) != 0)
{
printf ("#define PERFSQR_PP CNST_LIMB(0x");
mpz_out_str (stdout, 16, pp);
printf (")\n");
printf ("#define PERFSQR_PP_NORM CNST_LIMB(0x");
mpz_out_str (stdout, 16, pp_norm);
printf (")\n");
printf ("#define PERFSQR_PP_INVERTED CNST_LIMB(0x");
mpz_out_str (stdout, 16, pp_inverted);
printf (")\n");
}
printf ("\n");
mpz_init (mhi);
mpz_init (mlo);
printf ("/* This test identifies %.2f%% as non-squares. */\n",
(1.0 - total_fraction) * 100.0);
printf ("#define PERFSQR_MOD_TEST(up, usize) \\\n");
printf (" do { \\\n");
printf (" mp_limb_t r; \\\n");
if (mpz_sgn (pp) != 0)
printf (" PERFSQR_MOD_PP (r, up, usize); \\\n");
else
printf (" PERFSQR_MOD_34 (r, up, usize); \\\n");
for (i = 0; i < nfactor; i++)
{
printf (" \\\n");
printf (" /* %5.2f%% */ \\\n",
(1.0 - factor[i].fraction) * 100.0);
printf (" PERFSQR_MOD_%d (r, CNST_LIMB(%2d), CNST_LIMB(0x",
factor[i].divisor <= limb_bits ? 1 : 2,
factor[i].divisor);
mpz_out_str (stdout, 16, factor[i].inverse);
printf ("), \\\n");
printf (" CNST_LIMB(0x");
if ( factor[i].divisor <= limb_bits)
{
mpz_out_str (stdout, 16, factor[i].mask);
}
else
{
mpz_tdiv_r_2exp (mlo, factor[i].mask, (unsigned long) limb_bits);
mpz_tdiv_q_2exp (mhi, factor[i].mask, (unsigned long) limb_bits);
mpz_out_str (stdout, 16, mhi);
printf ("), CNST_LIMB(0x");
mpz_out_str (stdout, 16, mlo);
}
printf (")); \\\n");
}
printf (" } while (0)\n");
printf ("\n");
printf ("/* Grand total sq_res_0x100 and PERFSQR_MOD_TEST, %.2f%% non-squares. */\n",
(1.0 - (total_fraction * 44.0/256.0)) * 100.0);
printf ("\n");
printf ("/* helper for tests/mpz/t-perfsqr.c */\n");
printf ("#define PERFSQR_DIVISORS { 256,");
for (i = 0; i < nfactor; i++)
printf (" %d,", factor[i].divisor);
printf (" }\n");
mpz_clear (mhi);
mpz_clear (mlo);
}
int
main (int argc, char *argv[])
{
int limb_bits, nail_bits;
if (argc != 3)
{
fprintf (stderr, "Usage: gen-psqr <limbbits> <nailbits>\n");
exit (1);
}
limb_bits = atoi (argv[1]);
nail_bits = atoi (argv[2]);
if (limb_bits <= 0
|| nail_bits < 0
|| nail_bits >= limb_bits)
{
fprintf (stderr, "Invalid limb/nail bits: %d %d\n",
limb_bits, nail_bits);
exit (1);
}
generate_sq_res_0x100 (limb_bits);
generate_mod (limb_bits, nail_bits);
print (limb_bits, nail_bits);
return 0;
}

View file

@ -1,183 +0,0 @@
<?xml version="1.0" encoding="Windows-1252"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="8.00"
Name="gen-psqr"
ProjectGUID="{0412953E-06CE-4A60-9DCD-CA5CAC3A46CC}"
RootNamespace="genpsqr"
Keyword="Win32Proj"
>
<Platforms>
<Platform
Name="Win32"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory="$(SolutionDir)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
CharacterSet="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="..\..\"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
UsePrecompiledHeader="0"
WarningLevel="3"
Detect64BitPortabilityProblems="true"
DebugInformationFormat="4"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
LinkIncremental="2"
GenerateDebugInformation="true"
SubSystem="1"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
CommandLine=""
/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="$(SolutionDir)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
CharacterSet="1"
WholeProgramOptimization="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\..\"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
RuntimeLibrary="2"
UsePrecompiledHeader="0"
WarningLevel="3"
Detect64BitPortabilityProblems="true"
DebugInformationFormat="0"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
LinkIncremental="1"
GenerateDebugInformation="false"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="2"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
CommandLine=""
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<File
RelativePath=".\gen-psqr.c"
>
</File>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

File diff suppressed because it is too large Load diff

View file

@ -1,182 +0,0 @@
/* Declarations for getopt.
Copyright (C) 1989-1994, 1996-1999, 2001 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#ifndef _GETOPT_H
#ifndef __need_getopt
# define _GETOPT_H 1
#endif
/* If __GNU_LIBRARY__ is not already defined, either we are being used
standalone, or this is the first header included in the source file.
If we are being used with glibc, we need to include <features.h>, but
that does not exist if we are standalone. So: if __GNU_LIBRARY__ is
not defined, include <ctype.h>, which will pull in <features.h> for us
if it's from glibc. (Why ctype.h? It's guaranteed to exist and it
doesn't flood the namespace with stuff the way some other headers do.) */
#if !defined __GNU_LIBRARY__
# include <ctype.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
/* For communication from `getopt' to the caller.
When `getopt' finds an option that takes an argument,
the argument value is returned here.
Also, when `ordering' is RETURN_IN_ORDER,
each non-option ARGV-element is returned here. */
extern char *optarg;
/* Index in ARGV of the next element to be scanned.
This is used for communication to and from the caller
and for communication between successive calls to `getopt'.
On entry to `getopt', zero means this is the first call; initialize.
When `getopt' returns -1, this is the index of the first of the
non-option elements that the caller should itself scan.
Otherwise, `optind' communicates from one call to the next
how much of ARGV has been scanned so far. */
extern int optind;
/* Callers store zero here to inhibit the error message `getopt' prints
for unrecognized options. */
extern int opterr;
/* Set to an option character which was unrecognized. */
extern int optopt;
#ifndef __need_getopt
/* Describe the long-named options requested by the application.
The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector
of `struct option' terminated by an element containing a name which is
zero.
The field `has_arg' is:
no_argument (or 0) if the option does not take an argument,
required_argument (or 1) if the option requires an argument,
optional_argument (or 2) if the option takes an optional argument.
If the field `flag' is not NULL, it points to a variable that is set
to the value given in the field `val' when the option is found, but
left unchanged if the option is not found.
To have a long-named option do something other than set an `int' to
a compiled-in constant, such as set a value from `optarg', set the
option's `flag' field to zero and its `val' field to a nonzero
value (the equivalent single-letter option character, if there is
one). For long options that have a zero `flag' field, `getopt'
returns the contents of the `val' field. */
struct option
{
# if (defined __STDC__ && __STDC__) || defined __cplusplus
const char *name;
# else
char *name;
# endif
/* has_arg can't be an enum because some compilers complain about
type mismatches in all the code that assumes it is an int. */
int has_arg;
int *flag;
int val;
};
/* Names for the values of the `has_arg' field of `struct option'. */
# define no_argument 0
# define required_argument 1
# define optional_argument 2
#endif /* need getopt */
/* Get definitions and prototypes for functions to process the
arguments in ARGV (ARGC of them, minus the program name) for
options given in OPTS.
Return the option character from OPTS just read. Return -1 when
there are no more options. For unrecognized options, or options
missing arguments, `optopt' is set to the option letter, and '?' is
returned.
The OPTS string is a list of characters which are recognized option
letters, optionally followed by colons, specifying that that letter
takes an argument, to be placed in `optarg'.
If a letter in OPTS is followed by two colons, its argument is
optional. This behavior is specific to the GNU `getopt'.
The argument `--' causes premature termination of argument
scanning, explicitly telling `getopt' that there are no more
options.
If OPTS begins with `--', then non-option arguments are treated as
arguments to the option '\0'. This behavior is specific to the GNU
`getopt'. */
#if (defined __STDC__ && __STDC__) || defined __cplusplus
# ifdef __GNU_LIBRARY__
/* Many other libraries have conflicting prototypes for getopt, with
differences in the consts, in stdlib.h. To avoid compilation
errors, only prototype getopt for the GNU C library. */
extern int getopt (int ___argc, char *const *___argv, const char *__shortopts);
# else /* not __GNU_LIBRARY__ */
extern int getopt ();
# endif /* __GNU_LIBRARY__ */
# ifndef __need_getopt
extern int getopt_long (int ___argc, char *const *___argv,
const char *__shortopts,
const struct option *__longopts, int *__longind);
extern int getopt_long_only (int ___argc, char *const *___argv,
const char *__shortopts,
const struct option *__longopts, int *__longind);
/* Internal only. Users should not call this directly. */
extern int _getopt_internal (int ___argc, char *const *___argv,
const char *__shortopts,
const struct option *__longopts, int *__longind,
int __long_only);
# endif
#else /* not __STDC__ */
extern int getopt ();
# ifndef __need_getopt
extern int getopt_long ();
extern int getopt_long_only ();
extern int _getopt_internal ();
# endif
#endif /* __STDC__ */
#ifdef __cplusplus
}
#endif
/* Make sure we later can get all the definitions and declarations. */
#undef __need_getopt
#endif /* getopt.h */

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,8 +0,0 @@
echo configuring for a %2 bit build
if not exist %1 (echo creating directory %1 && md %1)
if not exist %1\mp_bases.h (echo creating %1\mp_bases.h && ..\gen-bases header %2 0 >%1\mp_bases.h)
if not exist %1\mp_bases.c (echo creating %1\mp_bases.c && ..\gen-bases table %2 0 >%1\mp_bases.c)
if not exist %1\fac_ui.h (echo creating %1\fac_ui.h && ..\gen-fac_ui %2 0 >%1\fac_ui.h)
if not exist %1\fib_table.h (echo creating %1\fib_table.h && ..\gen-fib header %2 0 >%1\fib_table.h)
if not exist %1\fib_table.c (echo creating %1\fib_table.c && ..\gen-fib table %2 0 >%1\fib_table.c)
if not exist %1\perfsqr.h (echo creating %1\perfsqr.h && ..\gen-psqr %2 0 >%1\perfsqr.h)

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,895 +0,0 @@
<?xml version="1.0" encoding="Windows-1252"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="8.00"
Name="lib_gmpxx"
ProjectGUID="{C82A62DB-DDB4-4072-832F-6DD841C6D80E}"
Keyword="Win32Proj"
>
<Platforms>
<Platform
Name="Win32"
/>
<Platform
Name="x64"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory="$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="4"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="..\..\;&quot;..\$(PlatformName)\&quot;"
PreprocessorDefinitions="WIN32;_DEBUG;_LIB;_WIN32;HAVE_CONFIG_H"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="1"
UsePrecompiledHeader="0"
ProgramDataBaseFileName="$(OutDir)\gmpxx.pdb"
WarningLevel="3"
Detect64BitPortabilityProblems="true"
DebugInformationFormat="4"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLibrarianTool"
AdditionalDependencies="&quot;..\lib\$(OutDir)\gmp.lib&quot;"
OutputFile="$(OutDir)\gmpxx.lib"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCPostBuildEventTool"
CommandLine="copy &quot;$(TargetDir)$(TargetName).lib&quot; &quot;..\lib\$(OutDir)\&quot;&#x0D;&#x0A;"
/>
</Configuration>
<Configuration
Name="Debug|x64"
OutputDirectory="$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="4"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="..\..\;&quot;..\$(PlatformName)\&quot;"
PreprocessorDefinitions="WIN32;_DEBUG;_LIB;_WIN32;HAVE_CONFIG_H"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="1"
UsePrecompiledHeader="0"
AssemblerListingLocation="$(IntDir)\"
ProgramDataBaseFileName="$(IntDir)\"
WarningLevel="3"
Detect64BitPortabilityProblems="true"
DebugInformationFormat="3"
CompileAs="0"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLibrarianTool"
AdditionalDependencies="&quot;..\lib\$(OutDir)\gmp.lib&quot;"
OutputFile="$(OutDir)\gmpxx.lib"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCPostBuildEventTool"
CommandLine="copy &quot;$(TargetDir)$(TargetName).lib&quot; &quot;..\lib\$(OutDir)\&quot;&#x0D;&#x0A;"
/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="4"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="3"
AdditionalIncludeDirectories="..\..\;&quot;..\$(PlatformName)\&quot;"
PreprocessorDefinitions="WIN32;NDEBUG;_LIB;_WIN32;HAVE_CONFIG_H"
RuntimeLibrary="0"
UsePrecompiledHeader="0"
ProgramDataBaseFileName="$(OutDir)\gmpxx.pdb"
WarningLevel="3"
Detect64BitPortabilityProblems="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLibrarianTool"
AdditionalDependencies="&quot;..\lib\$(OutDir)\gmp.lib&quot;"
OutputFile="$(OutDir)\gmpxx.lib"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCPostBuildEventTool"
CommandLine="copy &quot;$(TargetDir)$(TargetName).lib&quot; &quot;..\lib\$(OutDir)\&quot;&#x0D;&#x0A;"
/>
</Configuration>
<Configuration
Name="Release|x64"
OutputDirectory="$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="4"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="3"
AdditionalIncludeDirectories="..\..\;&quot;..\$(PlatformName)\&quot;"
PreprocessorDefinitions="WIN32;NDEBUG;_LIB;_WIN32;HAVE_CONFIG_H"
RuntimeLibrary="0"
UsePrecompiledHeader="0"
AssemblerListingLocation="$(IntDir)\"
ProgramDataBaseFileName="$(IntDir)\"
WarningLevel="3"
Detect64BitPortabilityProblems="true"
DebugInformationFormat="3"
CompileAs="0"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLibrarianTool"
AdditionalDependencies="&quot;..\lib\$(OutDir)\gmp.lib&quot;"
OutputFile="$(OutDir)\gmpxx.lib"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCPostBuildEventTool"
CommandLine="copy &quot;$(TargetDir)$(TargetName).lib&quot; &quot;..\lib\$(OutDir)\&quot;&#x0D;&#x0A;"
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<Filter
Name="Source Files"
Filter="cpp;c;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
>
<File
RelativePath="..\..\cxx\isfuns.cc"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\;..\..\;..\..\mpn\generic"
PreprocessorDefinitions="_WIN32"
RuntimeLibrary="1"
AssemblerListingLocation="$(IntDir)\"
ProgramDataBaseFileName="$(IntDir)\"
XMLDocumentationFileName=""
DebugInformationFormat="3"
/>
</FileConfiguration>
<FileConfiguration
Name="Debug|x64"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\;..\..\;..\..\mpn\generic"
PreprocessorDefinitions="_WIN32"
RuntimeLibrary="1"
AssemblerListingLocation="$(IntDir)\"
ProgramDataBaseFileName="$(IntDir)\"
XMLDocumentationFileName=""
DebugInformationFormat="3"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\;..\..\;..\..\mpn\generic"
PreprocessorDefinitions="_WIN32"
RuntimeLibrary="0"
AssemblerListingLocation="$(IntDir)\"
ProgramDataBaseFileName="$(IntDir)\"
XMLDocumentationFileName=""
/>
</FileConfiguration>
<FileConfiguration
Name="Release|x64"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\;..\..\;..\..\mpn\generic"
PreprocessorDefinitions="_WIN32"
RuntimeLibrary="0"
AssemblerListingLocation="$(IntDir)\"
ProgramDataBaseFileName="$(IntDir)\"
XMLDocumentationFileName=""
/>
</FileConfiguration>
</File>
<File
RelativePath="..\..\cxx\ismpf.cc"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\;..\..\;..\..\mpn\generic"
PreprocessorDefinitions="_WIN32"
RuntimeLibrary="1"
AssemblerListingLocation="$(IntDir)\"
ProgramDataBaseFileName="$(IntDir)\"
XMLDocumentationFileName=""
DebugInformationFormat="3"
/>
</FileConfiguration>
<FileConfiguration
Name="Debug|x64"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\;..\..\;..\..\mpn\generic"
PreprocessorDefinitions="_WIN32"
RuntimeLibrary="1"
AssemblerListingLocation="$(IntDir)\"
ProgramDataBaseFileName="$(IntDir)\"
XMLDocumentationFileName=""
DebugInformationFormat="3"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\;..\..\;..\..\mpn\generic"
PreprocessorDefinitions="_WIN32"
RuntimeLibrary="0"
AssemblerListingLocation="$(IntDir)\"
ProgramDataBaseFileName="$(IntDir)\"
XMLDocumentationFileName=""
/>
</FileConfiguration>
<FileConfiguration
Name="Release|x64"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\;..\..\;..\..\mpn\generic"
PreprocessorDefinitions="_WIN32"
RuntimeLibrary="0"
AssemblerListingLocation="$(IntDir)\"
ProgramDataBaseFileName="$(IntDir)\"
XMLDocumentationFileName=""
/>
</FileConfiguration>
</File>
<File
RelativePath="..\..\cxx\ismpq.cc"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\;..\..\;..\..\mpn\generic"
PreprocessorDefinitions="_WIN32"
RuntimeLibrary="1"
AssemblerListingLocation="$(IntDir)\"
ProgramDataBaseFileName="$(IntDir)\"
XMLDocumentationFileName=""
DebugInformationFormat="3"
/>
</FileConfiguration>
<FileConfiguration
Name="Debug|x64"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\;..\..\;..\..\mpn\generic"
PreprocessorDefinitions="_WIN32"
RuntimeLibrary="1"
AssemblerListingLocation="$(IntDir)\"
ProgramDataBaseFileName="$(IntDir)\"
XMLDocumentationFileName=""
DebugInformationFormat="3"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\;..\..\;..\..\mpn\generic"
PreprocessorDefinitions="_WIN32"
RuntimeLibrary="0"
AssemblerListingLocation="$(IntDir)\"
ProgramDataBaseFileName="$(IntDir)\"
XMLDocumentationFileName=""
/>
</FileConfiguration>
<FileConfiguration
Name="Release|x64"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\;..\..\;..\..\mpn\generic"
PreprocessorDefinitions="_WIN32"
RuntimeLibrary="0"
AssemblerListingLocation="$(IntDir)\"
ProgramDataBaseFileName="$(IntDir)\"
XMLDocumentationFileName=""
/>
</FileConfiguration>
</File>
<File
RelativePath="..\..\cxx\ismpz.cc"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\;..\..\;..\..\mpn\generic"
PreprocessorDefinitions="_WIN32"
RuntimeLibrary="1"
AssemblerListingLocation="$(IntDir)\"
ProgramDataBaseFileName="$(IntDir)\"
XMLDocumentationFileName=""
DebugInformationFormat="3"
/>
</FileConfiguration>
<FileConfiguration
Name="Debug|x64"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\;..\..\;..\..\mpn\generic"
PreprocessorDefinitions="_WIN32"
RuntimeLibrary="1"
AssemblerListingLocation="$(IntDir)\"
ProgramDataBaseFileName="$(IntDir)\"
XMLDocumentationFileName=""
DebugInformationFormat="3"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\;..\..\;..\..\mpn\generic"
PreprocessorDefinitions="_WIN32"
RuntimeLibrary="0"
AssemblerListingLocation="$(IntDir)\"
ProgramDataBaseFileName="$(IntDir)\"
XMLDocumentationFileName=""
/>
</FileConfiguration>
<FileConfiguration
Name="Release|x64"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\;..\..\;..\..\mpn\generic"
PreprocessorDefinitions="_WIN32"
RuntimeLibrary="0"
AssemblerListingLocation="$(IntDir)\"
ProgramDataBaseFileName="$(IntDir)\"
XMLDocumentationFileName=""
/>
</FileConfiguration>
</File>
<File
RelativePath="..\..\cxx\ismpznw.cc"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
AssemblerListingLocation="$(IntDir)\"
ProgramDataBaseFileName="$(IntDir)\"
XMLDocumentationFileName=""
/>
</FileConfiguration>
<FileConfiguration
Name="Debug|x64"
>
<Tool
Name="VCCLCompilerTool"
AssemblerListingLocation="$(IntDir)\"
ProgramDataBaseFileName="$(IntDir)\"
XMLDocumentationFileName=""
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
AssemblerListingLocation="$(IntDir)\"
ProgramDataBaseFileName="$(IntDir)\"
XMLDocumentationFileName=""
/>
</FileConfiguration>
<FileConfiguration
Name="Release|x64"
>
<Tool
Name="VCCLCompilerTool"
AssemblerListingLocation="$(IntDir)\"
ProgramDataBaseFileName="$(IntDir)\"
XMLDocumentationFileName=""
/>
</FileConfiguration>
</File>
<File
RelativePath="..\..\cxx\osdoprnti.cc"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\;..\..\;..\..\mpn\generic"
PreprocessorDefinitions="_WIN32"
RuntimeLibrary="1"
AssemblerListingLocation="$(IntDir)\"
ProgramDataBaseFileName="$(IntDir)\"
XMLDocumentationFileName=""
DebugInformationFormat="3"
/>
</FileConfiguration>
<FileConfiguration
Name="Debug|x64"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\;..\..\;..\..\mpn\generic"
PreprocessorDefinitions="_WIN32"
RuntimeLibrary="1"
AssemblerListingLocation="$(IntDir)\"
ProgramDataBaseFileName="$(IntDir)\"
XMLDocumentationFileName=""
DebugInformationFormat="3"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\;..\..\;..\..\mpn\generic"
PreprocessorDefinitions="_WIN32"
RuntimeLibrary="0"
AssemblerListingLocation="$(IntDir)\"
ProgramDataBaseFileName="$(IntDir)\"
XMLDocumentationFileName=""
/>
</FileConfiguration>
<FileConfiguration
Name="Release|x64"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\;..\..\;..\..\mpn\generic"
PreprocessorDefinitions="_WIN32"
RuntimeLibrary="0"
AssemblerListingLocation="$(IntDir)\"
ProgramDataBaseFileName="$(IntDir)\"
XMLDocumentationFileName=""
/>
</FileConfiguration>
</File>
<File
RelativePath="..\..\cxx\osfuns.cc"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\;..\..\;..\..\mpn\generic"
PreprocessorDefinitions="_WIN32"
RuntimeLibrary="1"
AssemblerListingLocation="$(IntDir)\"
ProgramDataBaseFileName="$(IntDir)\"
XMLDocumentationFileName=""
DebugInformationFormat="3"
/>
</FileConfiguration>
<FileConfiguration
Name="Debug|x64"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\;..\..\;..\..\mpn\generic"
PreprocessorDefinitions="_WIN32"
RuntimeLibrary="1"
AssemblerListingLocation="$(IntDir)\"
ProgramDataBaseFileName="$(IntDir)\"
XMLDocumentationFileName=""
DebugInformationFormat="3"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\;..\..\;..\..\mpn\generic"
PreprocessorDefinitions="_WIN32"
RuntimeLibrary="0"
AssemblerListingLocation="$(IntDir)\"
ProgramDataBaseFileName="$(IntDir)\"
XMLDocumentationFileName=""
/>
</FileConfiguration>
<FileConfiguration
Name="Release|x64"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\;..\..\;..\..\mpn\generic"
PreprocessorDefinitions="_WIN32"
RuntimeLibrary="0"
AssemblerListingLocation="$(IntDir)\"
ProgramDataBaseFileName="$(IntDir)\"
XMLDocumentationFileName=""
/>
</FileConfiguration>
</File>
<File
RelativePath="..\..\cxx\osmpf.cc"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\;..\..\;..\..\mpn\generic"
PreprocessorDefinitions="_WIN32"
RuntimeLibrary="1"
AssemblerListingLocation="$(IntDir)\"
ProgramDataBaseFileName="$(IntDir)\"
XMLDocumentationFileName=""
DebugInformationFormat="3"
/>
</FileConfiguration>
<FileConfiguration
Name="Debug|x64"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\;..\..\;..\..\mpn\generic"
PreprocessorDefinitions="_WIN32"
RuntimeLibrary="1"
AssemblerListingLocation="$(IntDir)\"
ProgramDataBaseFileName="$(IntDir)\"
XMLDocumentationFileName=""
DebugInformationFormat="3"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\;..\..\;..\..\mpn\generic"
PreprocessorDefinitions="_WIN32"
RuntimeLibrary="0"
AssemblerListingLocation="$(IntDir)\"
ProgramDataBaseFileName="$(IntDir)\"
XMLDocumentationFileName=""
/>
</FileConfiguration>
<FileConfiguration
Name="Release|x64"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\;..\..\;..\..\mpn\generic"
PreprocessorDefinitions="_WIN32"
RuntimeLibrary="0"
AssemblerListingLocation="$(IntDir)\"
ProgramDataBaseFileName="$(IntDir)\"
XMLDocumentationFileName=""
/>
</FileConfiguration>
</File>
<File
RelativePath="..\..\cxx\osmpq.cc"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\;..\..\;..\..\mpn\generic"
PreprocessorDefinitions="_WIN32"
RuntimeLibrary="1"
AssemblerListingLocation="$(IntDir)\"
ProgramDataBaseFileName="$(IntDir)\"
XMLDocumentationFileName=""
DebugInformationFormat="3"
/>
</FileConfiguration>
<FileConfiguration
Name="Debug|x64"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\;..\..\;..\..\mpn\generic"
PreprocessorDefinitions="_WIN32"
RuntimeLibrary="1"
AssemblerListingLocation="$(IntDir)\"
ProgramDataBaseFileName="$(IntDir)\"
XMLDocumentationFileName=""
DebugInformationFormat="3"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\;..\..\;..\..\mpn\generic"
PreprocessorDefinitions="_WIN32"
RuntimeLibrary="0"
AssemblerListingLocation="$(IntDir)\"
ProgramDataBaseFileName="$(IntDir)\"
XMLDocumentationFileName=""
/>
</FileConfiguration>
<FileConfiguration
Name="Release|x64"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\;..\..\;..\..\mpn\generic"
PreprocessorDefinitions="_WIN32"
RuntimeLibrary="0"
AssemblerListingLocation="$(IntDir)\"
ProgramDataBaseFileName="$(IntDir)\"
XMLDocumentationFileName=""
/>
</FileConfiguration>
</File>
<File
RelativePath="..\..\cxx\osmpz.cc"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\;..\..\;..\..\mpn\generic"
PreprocessorDefinitions="_WIN32"
RuntimeLibrary="1"
AssemblerListingLocation="$(IntDir)\"
ProgramDataBaseFileName="$(IntDir)\"
XMLDocumentationFileName=""
DebugInformationFormat="3"
/>
</FileConfiguration>
<FileConfiguration
Name="Debug|x64"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\;..\..\;..\..\mpn\generic"
PreprocessorDefinitions="_WIN32"
RuntimeLibrary="1"
AssemblerListingLocation="$(IntDir)\"
ProgramDataBaseFileName="$(IntDir)\"
XMLDocumentationFileName=""
DebugInformationFormat="3"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\;..\..\;..\..\mpn\generic"
PreprocessorDefinitions="_WIN32"
RuntimeLibrary="0"
AssemblerListingLocation="$(IntDir)\"
ProgramDataBaseFileName="$(IntDir)\"
XMLDocumentationFileName=""
/>
</FileConfiguration>
<FileConfiguration
Name="Release|x64"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\;..\..\;..\..\mpn\generic"
PreprocessorDefinitions="_WIN32"
RuntimeLibrary="0"
AssemblerListingLocation="$(IntDir)\"
ProgramDataBaseFileName="$(IntDir)\"
XMLDocumentationFileName=""
/>
</FileConfiguration>
</File>
</Filter>
<Filter
Name="Header Files"
Filter="h;hpp;hxx;hm;inl;inc;xsd"
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
>
<File
RelativePath="..\..\config.h"
>
</File>
<File
RelativePath="..\..\gmp-impl.h"
>
</File>
<File
RelativePath="..\gmp.h"
>
</File>
<File
RelativePath="..\..\gmpxx.h"
>
</File>
<File
RelativePath="..\..\longlong.h"
>
</File>
</Filter>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

File diff suppressed because it is too large Load diff

View file

@ -1,37 +0,0 @@
/* mpn_popcount, mpn_hamdist -- mpn bit population count/hamming distance.
Copyright 1994, 1996, 2000, 2001, 2002, 2005 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or (at your
option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#include "gmp.h"
#include "gmp-impl.h"
#define OPERATION_popcount 1
#define OPERATION_hamdist 0
#include "..\mpn\generic\popham.c"
#undef OPERATION_popcount
#undef OPERATION_hamdist
#undef FNAME
#undef POPHAM
#define OPERATION_popcount 0
#define OPERATION_hamdist 1
#include "..\mpn\generic\popham.c"

View file

@ -1,390 +0,0 @@
=====================================================
A. Compiling GMP and MPFR with the Visual Studio 2005
=====================================================
These VC++ build projects are based on GMP 4.2.1 and MPFR 2.2.1.
Some files in GMP 4.2.1 need to be modified to compile with VC++.
These have been moved into the vc8 build directory to avoid
making changes to GMP files. But this means that these files may
need to be updated if the related GMP files change. Building
MPFR is optional as this is no longer ditributed with GMP.
STEP ONE
========
First obtain the GMP and MPFR distributions, named gmp-<ver> and
mpfr-<ver> where <ver> are the versions being used. Unzip the
gmp-<ver> files in the ZIP archive into a directory tree with the
gmp-<ver> directory as its root. Unzip the MPFR files so that
the root mpfr-<ver> directory is within the gmp-<ver> directory.
That is, the directories mpfr and mpfr-<ver> are at the same level
in the directory tree. Then delete the mpfr directory and rename
the directory mpfr-<ver> to mpfr. You should then apply any patches
that are needed for either or both GMP and MPFR. Then rename the
mparam_h.in file in the mpfr distribution to mparam.h.
Unzip the files in this distribution so that they are merged into
the above directory tree with the directories
build.vc8 -- build files for gmp and mpfr
mpn/x86i -- the YASM x86 assembler files (Pentium family)
mpn/amd64i -- the YASM x64 assembler files (AMD64)
within the gmp root directory gmp-<ver>.
STEP TWO
========
If you wish to use the assembler files you will also need the YASM
open source x86 assembler (r1438 or later) for Windows which can be
obtained from:
http://www.tortall.net/projects/yasm/
This assembler should be placed in the bin directory used by VC++,
which, for Visual Stduio 2005, is typically:
C:\Program Files (x86)\Microsoft Visual Studio 8\VC\bin
You will also need to move the yasm.rules file from this distribution
into the directory where Visual Studio 2005 expects to find it, which
is typically:
C:\Program Files (x86)\Microsoft Visual Studio 8\VC\VCProjectDefaults
Alternatively you can configure the path for rules files in the VC++
configuration dialogue.
The NASM assembler is no longer supported as it cannot assemble 64-bit
instructions and also has problems with include file directory handling.
STEP THREE
==========
Visual Studiio 2005 can be started for building the 32 or 64 bit versions
of GMP and MPFR by clicking on the *.sln file in the build.vc8 directory.
GMP and MPFR are built using the appropriate build projects. Select the
desired library and then set the desired configuration:
win32 or x64
release or debug
To build GMP dynamic link libraries (DLLs) choose one (or more) of:
dll_gmp_amd64 - GMP DLL using AMD64 assembler (x64)
dll_gmp_gc - GMP DLL using generic C (win32 & x64)
dll_gmp_p0 - GMP DLL using Pentium assembler (win32)
dll_gmp_p3 - GMP DLL using Pentium III assembler (win32)
dll_gmp_p4 - GMP DLL using Pentium IV assembler (win32)
To build GMP static libraries choose one (or more) of:
lib_gmp_amd64 - GMP library using AMD64 assembler (x64)
lib_gmp_gc - GMP library using generic C (win32 & x64)
lib_gmp_p0 - GMP library using Pentium assembler (win32)
lib_gmp_p3 - GMP library using Pentium III assembler (win32
lib_gmp_p4 - GMP library using Pentium IV assembler (win32)
Before any of these libraries is built the appropriate GMP configuration
file is automatically copied into config.h. After a static library is
built it is then copied to the file gmp.lib in the 'lib' sub-directory
within the VC++ solution folder (build.vc8). Simlarly when a DLL is built,
the resulting DLL, its export libraries and its debug symbol file are
copied to the files gmp.dll, gmp.exp, gmp.lib and gmp.pdb within the
'dll' sub-directory.
This means that the 'dll' and 'lib' sub-directories respectively contain
the last GMP DLLs and static libraries built. These are then the libraries
used to build the MPFR and GMPXX libraries described later.
The GMP DLL projects include the C++ files. If you do not want these the
relevent files needed to be excluded from the DLL(s) you want to build. Go
to the 'cpp' subdirectory of their build project in the IDE and exclude all
the files in this subdirectory from the build process.
All the DLLs and static libraries are multi-threaded and are linked to the
multi-threaded Microsoft run-time libraries (DLLs are linked to DLL run time
libraries and static libraries are linked to run time static libraries).
Within the 'dll' and 'lib' sub-directories used for output the structure is:
DLL or LIB
Win32
Release
Debug
x64
Release
Debug
in order to enable the appropriate library for the desired target
platform to be located.
STEP FOUR
=========
After a GMP library has been built, other libraries can be built.
These always use the last GMP library (of the same type) that has
been built.
To build the MPFR DLL use:
dll_mpfr - MPFR DLL using generic C (win32 & x64)
To build the MPFR static library use:
lib_mpfr - MPFR static library (win32 & x64)
To build the GMP C+ library wrapper use:
lib_gmpxx - GMP C++ wrapper static library (win32 & x64)
The MPFR static library build assumes that this is intended to work with
the lib_gmp or lib_gmpxx static libraries.
If more than one gmp DLL is built, please remember that when the MPFR DLL
is built it will be linked to the last gmp DLL that is built. Alternatively
you can edit the MPFR linker property page to link to a specific export
library. The debug versions of these DLLs and libraries are built in the
same way.
STEP FIVE (Tests)
=================
All the remaining projects are for GMP testing. In Vusual Studio 2005 these
are in the Tests project folder and its sub-folders but Visual C++ Express
doesn't support project folders so they have to be identified manually.
These test cover only GMP at the moment and must be built and run manually.
The tests can only be built with the static libraries because they use
internal symbols that are not exported by the DLLs.
=====================
B. Using GMP and MPFR
=====================
Many applications that rely on GMP and MPFR also include the gmp.h
and mpfr.h header files but it is impossible to be certain that
these will match the versions of GMP and MPFR that are built by
this distribution. Hence when this distribution is being used
with a GMP based application it is important to ensure that any
GMP and MPFR header files used by such an application are those
that are supplied here and not those that might have been supplied
with the application itself. This will often be only gmp.h and
mpfr.h but some other header files may also be involved.
The static libraries and DLLs built here use the _cdecl calling
convention in which exported symbols have their C names prefixed
with an extra '_' character. Some applications expect the _stdcall
convention to be used in which there is an underscore prefix and a
suffix of '@n' where n is the number of bytes used for the function
arguments on the stack. Such applications will need to be modified
to work with the GMP and MPFR DLLs and libraries provided here. The
alternative of attempting to build GMP and MPFR using the _stdcall
convention is not recommended (and won't work with the assembler
based builds anyway). This is further complicated if the builds for
x64 are used since the conventions here are different once again.
1. Using the Static Libraries
=============================
To build a GMP C or C++ based application using the the static
libraries all that needs to be done is to add the GMP or GMPXX
static libraries to the application build process. To build an
MPFR based application add the MPFR library and the GMP or GMPXX
library as appropriate.
It is, of course, important to ensure that any libraries that are
used have been built for the target platform.
2. Using the DLL Export Libraries
=================================
There are two ways of linking to a DLL. The first way is to use
one or more of the DLL export libraries built as described earlier
(note that these are not the same as static libraries although
they are used in a similar way when an application is built).
If you intend to use the DLL export libraries in an application
you need to:
a. ensure that the application can locate the GMP and/or
the MPFR DLLs in question when it is run. This involves
putting the DLL(s) on a recognised directory path.
b. define __GMP_LIBGMP_DLL and/or __MPFR_LIBGMP_DLL when
the application is built in order to ensure that GMP
and/or MPFR symbols, which are DLL export symbols, are
properly recognised as such.
3. Using DLL Dynamic loading
============================
The second way of linking to a DLL is to use dynamic loading.
This is more complex and will not be discussed here. The VC++
documentation describes how to use DLLs in this way.
==============================
KNOWN BUILD ISSUES FOR VC++ v8
==============================
1. A few test files require minor modifications to compile in this
build as follows;
File: tests\tests.h
25 25
26 26 #include "config.h"
27 27
28 28 #include <setjmp.h> /* for jmp_buf */
29 29
30 30 #if defined (__cplusplus)
--------------------------------------------
31 using namespace std; /* BRG */
--------------------------------------------
31 32 extern "C" {
32 33 #endif
--------------------------------------------
33
--------------------------------------------
34 34
35 35 #ifdef __cplusplus
36 36 #define ANYARGS ...
37 37 #else
38 38 #define ANYARGS
39 39 #endif
--------------------------------------------
File: tests\misc.c
24 24 #include <ctype.h>
25 25 #include <signal.h>
26 26 #include <stdio.h>
27 27 #include <stdlib.h> /* for getenv */
28 28 #include <string.h>
29 29
------------------------------------------------------------------------
30 #if HAVE_FLOAT_H
------------------------------------------------------------------------
30 #if HAVE_FLOAT_H || defined( _MSC_VER ) /* BRG */
------------------------------------------------------------------------
31 31 #include <float.h> /* for DBL_MANT_DIG */
32 32 #endif
33 33
34 34 #if TIME_WITH_SYS_TIME
35 35 # include <sys/time.h> /* for struct timeval */
36 36 # include <time.h>
------------------------------------------------------------------------
------------------------------------------------------------------------
474 474 case 1: rc = 3; break; /* tozero */
475 475 case 2: rc = 2; break; /* up */
476 476 case 3: rc = 1; break; /* down */
477 477 default:
478 478 return 0;
479 479 }
------------------------------------------------------------------------
480 #if defined( _MSC_VER )
481 { unsigned int cw;
482 _controlfp_s(&cw, 0, 0);
483 _controlfp_s(&cw, (cw & ~0xC00) | (rc << 10), _MCW_RC);
484 }
485 #else
------------------------------------------------------------------------
480 486 x86_fldcw ((x86_fstcw () & ~0xC00) | (rc << 10));
------------------------------------------------------------------------
487 #endif
------------------------------------------------------------------------
481 488 return 1;
482 489 #endif
483 490
484 491 return 0;
485 492 }
486 493
487 494 /* Return the hardware floating point rounding mode, or -1 if unknown. */
488 495 int
489 496 tests_hardware_getround (void)
490 497 {
491 498 #if HAVE_HOST_CPU_FAMILY_x86
------------------------------------------------------------------------
492 switch ((x86_fstcw () & ~0xC00) >> 10) {
------------------------------------------------------------------------
499 unsigned int cw;
500 #if defined( _MSC_VER )
501 _controlfp_s(&cw, 0, 0);
502 #else
503 cw = x86_fstcw();
504 #endif
505
506 switch ((cw & ~0xC00) >> 10) {
------------------------------------------------------------------------
493 507 case 0: return 0; break; /* nearest */
494 508 case 1: return 3; break; /* down */
495 509 case 2: return 2; break; /* up */
496 510 case 3: return 1; break; /* tozero */
497 511 }
498 512 #endif
------------------------------------------------------------------------
File: tests\mpz\t-perfsqr.c
23 23 #include <stdlib.h>
24 24
25 25 #include "gmp.h"
26 26 #include "gmp-impl.h"
27 27 #include "tests.h"
28 28
------------------------------------------------------------------------
29 #ifdef _MSC_VER /* BRG */
30 #include "perfsqr.h"
31 #else
------------------------------------------------------------------------
29 32 #include "mpn/perfsqr.h"
------------------------------------------------------------------------
30
------------------------------------------------------------------------
33 #endif
------------------------------------------------------------------------
31 34
32 35 /* check_modulo() exercises mpz_perfect_square_p on squares which cover each
33 36 possible quadratic residue to each divisor used within
34 37 mpn_perfect_square_p, ensuring those residues aren't incorrectly claimed
35 38 to be non-residues.
36 39
------------------------------------------------------------------------
File: tests\mpn\t-perfsqr.c
23 23 #include <stdlib.h>
24 24
25 25 #include "gmp.h"
26 26 #include "gmp-impl.h"
27 27 #include "tests.h"
28 28
------------------------------------------------------------------------
29 #ifdef _MSC_VER /* BRG */
30 #include "perfsqr.h"
31 #else
------------------------------------------------------------------------
29 32 #include "mpn/perfsqr.h"
------------------------------------------------------------------------
30
------------------------------------------------------------------------
33 #endif
------------------------------------------------------------------------
31 34
32 35 #define PERFSQR_MOD_MASK ((CNST_LIMB(1) << PERFSQR_MOD_BITS) - 1)
33 36
34 37 void
35 38 check_mod_2 (mp_limb_t d, mp_limb_t inv, mp_limb_t got_hi, mp_limb_t got_lo)
36 39 {
------------------------------------------------------------------------
================
Acknowledgements
================
My thanks to:
1. The GMP team for their work on GMP and the MPFR team for their work on MPFR
2. Sam Krasnik and Mike Loehr for suggestions on how to improve
and correct errors in earlier releases.
3. Patrick Pelissier and Vincent Lefèvre for helping to resolve
VC++ issues in MPFR.
Brian Gladman, December 2006

View file

@ -1,11 +0,0 @@
#ifndef __UNISTD_H__
#define __UNISTD_H__
#include <io.h>
#include "getopt.h"
#define random rand
#define SIGHUP 1
#endif

View file

@ -1,114 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<VisualStudioToolFile
Name="Yasm"
Version="8.00"
>
<Rules>
<CustomBuildRule
Name="YASM"
DisplayName="Yasm Assembler"
CommandLine="yasm -Xvc -f $(PlatformName) [PreProc] [Parser] [Debug] [ListFileName] [Defines] [UnDefines] [IncludePaths] [ObjectFileName] [inputs]"
Outputs="[$ObjectFileName]"
FileExtensions="*.asm"
ExecutionDescription="Assembling $(InputFileName)"
ShowOnlyRuleProperties="false"
>
<Properties>
<StringProperty
Name="Defines"
DisplayName="Definitions"
Category="Pre-Defined Symbols"
Description="Specify pre-defined symbols (&apos;symbol&apos; or &apos;symbol = value&apos;) "
Switch="-D [value]"
Delimited="true"
Inheritable="true"
/>
<StringProperty
Name="IncludePaths"
DisplayName="Include Paths"
Category="Configuration"
Description="Set the paths for any additional include files"
Switch="-i &quot;[value]&quot;"
Delimited="true"
Inheritable="true"
/>
<StringProperty
Name="UnDefines"
DisplayName="Remove Definitions"
Category="Pre-Defined Symbols"
Description="Remove pre-defined symbols "
Switch="-u [value]"
Delimited="true"
Inheritable="true"
/>
<StringProperty
Name="ObjectFileName"
DisplayName="Object File Name"
Category="Output"
Description="Select the output file name"
Switch="-o &quot;[value]&quot;"
DefaultValue="&quot;$(IntDir)\$(InputName).obj&quot;"
/>
<StringProperty
Name="ListFileName"
DisplayName="List File Name"
Category="Output"
Description="Select an output listing by setting its file name"
Switch="-l &quot;[value]&quot;"
/>
<StringProperty
Name="PreIncludeFile"
DisplayName="Pre Include File"
Category="Configuration"
Description="Select a pre-included file by setting its name"
Switch="-P &quot;[value]&quot;"
/>
<BooleanProperty
Name="Debug"
DisplayName="Debug Information"
Category="Output"
Description="Generate debugging information"
Switch="-g cv8"
/>
<EnumProperty
Name="PreProc"
DisplayName="Pre-Processor"
Category="Configuration"
Description="Select the pre-processor (&apos;nasm&apos; or &apos;raw&apos;)"
>
<Values>
<EnumValue
Value="0"
Switch="-rnasm"
DisplayName="Nasm "
/>
<EnumValue
Value="1"
Switch="-rraw"
DisplayName="Raw"
/>
</Values>
</EnumProperty>
<EnumProperty
Name="Parser"
DisplayName="Parser"
Category="Configuration"
Description="Select the parser for Intel (&apos;nasm&apos;) or AT&amp;T ( &apos;gas&apos;) syntax"
>
<Values>
<EnumValue
Value="0"
Switch="-pnasm"
DisplayName="Nasm"
/>
<EnumValue
Value="1"
Switch="-pgas"
DisplayName="Gas"
/>
</Values>
</EnumProperty>
</Properties>
</CustomBuildRule>
</Rules>
</VisualStudioToolFile>

View file

@ -1,54 +0,0 @@
; the symbol 'frame' keeps track of how much stack space is
; being used locally in order to be able to track where the
; routine parameters arew relative to the current value of
; the stack pointer in rsp
; save registers on the stack and adjust 'frame' accordingly
%macro f_push 1-*
%rep %0
push %1
%rotate 1
%assign frame frame + 8
%endrep
%endmacro
; restore a register from the stack and adjust 'frame' accordingly
%macro f_pop 1
%rep %0
%rotate -1
pop %1
%endrep
%assign frame frame - 8
%endmacro
; restore registers from the stack at the end of a routine where
; 'frame' does not need to be adjusted because it won't be used
; again
%macro f_pop 2-*
%rep %0
%rotate -1
pop %1
%endrep
%endmacro
; obtain some local space on the stack
%macro f_add 1
sub rsp,%1
%assign frame frame + %1
%endmacro
; return local space on the stack at the end of a routine without the
; need to adjust 'frame'
%macro f_sub 1
add rsp,%1
%endmacro
%assign frame 0
%define x86_regs rbx,rsi,rdi,rbp

View file

@ -1,173 +0,0 @@
;
; AMD64 mpn_add_n/mpn_sub_n -- mpn add or subtract.
;
; Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
;
; This file is part of the GNU MP Library.
;
; The GNU MP Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License, or (at your option) any later version.
;
; The GNU MP Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the GNU MP Library; see the file COPYING.LIB. If
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
; Suite 330, Boston, MA 02111-1307, USA.
;
; Adapted by Brian Gladman AMD64 using the Microsoft VC++ v8 64-bit
; compiler and the YASM assembler.
;
; Calling interface:
;
; mp_limb_t __gmpn_<op>_n( <op> = add OR sub
; mp_ptr dst, ecx
; mp_srcptr src1, rdx
; mp_srcptr src2, r8
; mp_size_t len r9
; )
;
; mp_limb_t __gmpn_<op>_nc( <op> = add OR sub
; mp_ptr dst, ecx
; mp_srcptr src1, rdx
; mp_srcptr src2, r8
; mp_size_t len, r9
; mp_limb_t carry [rsp+0x28]
; )
;
; Calculate src1[size] plus(minus) src2[size] and store the result in
; dst[size]. The return value is the carry bit from the top of the result
; (1 or 0). The _nc version accepts 1 or 0 for an initial carry into the
; low limb of the calculation. Note values other than 1 or 0 here will
; lead to garbage results.
%define dst rcx ; destination pointer
%define sr1 rdx ; source 1 pointer
%define sr2 r8 ; source 2 pointer
%define len r9 ; number of limbs
%define cry [rsp+0x28] ; carry value
%define r_jmp r10 ; temporary for jump table entry
%define r_cnt r11 ; temporary for loop count
%define UNROLL_LOG2 4
%define UNROLL_COUNT (1 << UNROLL_LOG2)
%define UNROLL_MASK (UNROLL_COUNT - 1)
%define UNROLL_BYTES (8 * UNROLL_COUNT)
%define UNROLL_THRESHOLD 8
%if UNROLL_BYTES >= 256
%error unroll count is too large
%elif UNROLL_BYTES >= 128
%define off 128
%else
%define off 0
%endif
%macro mac_sub 4
global %1%4
global %1%3
%ifdef DLL
export %1%4
export %1%3
%define PIC
%endif
%1%4:
mov rax,[rsp+0x28]
jmp %%0
%1%3:
xor rax,rax
%%0:
movsxd len,r9d
cmp len,UNROLL_THRESHOLD
jae %%2
lea sr1,[sr1+len*8]
lea sr2,[sr2+len*8]
lea dst,[dst+len*8]
neg len
shr rax,1
%%1:
mov rax,[sr1+len*8]
mov r10,[sr2+len*8]
%2 rax,r10
mov [dst+len*8],rax
inc len
jnz %%1
mov rax,dword 0
setc al
ret
%%2:
mov r_cnt,1
and r_cnt,len
push r_cnt
and len,-2
mov r_cnt,len
dec r_cnt
shr r_cnt,UNROLL_LOG2
neg len
and len,UNROLL_MASK
lea r_jmp,[len*4]
neg len
lea sr1,[sr1+len*8+off]
lea sr2,[sr2+len*8+off]
lea dst,[dst+len*8+off]
shr rax,1
%ifdef PIC
lea r_jmp,[r_jmp+r_jmp*2]
lea rax,[%%3 wrt rip]
lea r_jmp,[r_jmp+rax]
%else
lea r_jmp,[r_jmp+r_jmp*2+%%3]
%endif
jmp r_jmp
%%3:
%define CHUNK_COUNT 2
%assign i 0
%rep UNROLL_COUNT / CHUNK_COUNT
%assign disp0 8 * i * CHUNK_COUNT - off
mov r_jmp,[byte sr1+disp0] ; len and r_jmp registers
mov len,[byte sr1+disp0+8] ; now not needed
%2 r_jmp,[byte sr2+disp0]
mov [byte dst+disp0],r_jmp
%2 len,[byte sr2+disp0+8]
mov [byte dst+disp0+8],len
%assign i i + 1
%endrep
dec r_cnt
lea sr1,[sr1+UNROLL_BYTES]
lea sr2,[sr2+UNROLL_BYTES]
lea dst,[dst+UNROLL_BYTES]
jns %%3
pop rax
dec rax
js %%5
mov len,[sr1-off]
%2 len,[sr2-off]
mov [dst-off],len
%%5:mov rax,dword 0
setc al
ret
%endmacro
bits 64
text
mac_sub __g,adc,mpn_add_n,mpn_add_nc
mac_sub __g,sbb,mpn_sub_n,mpn_sub_nc
end

View file

@ -1,214 +0,0 @@
;
; AMD64 mpn_add_n/mpn_sub_n -- mpn add or subtract.
;
; Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
;
; This file is part of the GNU MP Library.
;
; The GNU MP Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License, or (at your option) any later version.
;
; The GNU MP Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the GNU MP Library; see the file COPYING.LIB. If
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
; Suite 330, Boston, MA 02111-1307, USA.
;
; Adapted by Brian Gladman AMD64 using the Microsoft VC++ v8 64-bit
; compiler and the YASM assembler.
;
; Calling interface:
;
; mp_limb_t __gmpn_<op>mul_1( <op> = add or sub
; mp_ptr dst, ecx
; mp_srcptr src, edx
; mp_size_t size, r8
; mp_limb_t mult r9
; )
;
; mp_limb_t __gmpn_<op>mul_1c(
; mp_ptr dst, ecx
; mp_srcptr src, edx
; mp_size_t size, r8
; mp_limb_t mult, r9
; mp_limb_t carry [rsp+0x28]
; )
;
; Calculate src[size] multiplied by mult[1] and add to /subtract from dst[size] and
; return the carry or borrow from the top of the result
%define dst rcx
%define len r8
%define mlt r9
%define src r10
%define cry r11
%define UNROLL_LOG2 4
%define UNROLL_COUNT (1 << UNROLL_LOG2)
%define UNROLL_MASK (UNROLL_COUNT - 1)
%define UNROLL_BYTES (8 * UNROLL_COUNT)
%define UNROLL_THRESHOLD 9
%if UNROLL_BYTES >= 256
%error unroll count is too large
%elif UNROLL_BYTES >= 128
%define off 128
%else
%define off 0
%endif
%macro mac_sub 4
global %1%3
global %1%4
%ifdef DLL
export %1%3
export %1%4
%define PIC
%endif
%1%3:
movsxd len,r8d
mov src,rdx ; source ptr
xor cry,cry ; carry = 0
dec len ; test for one limb only
jnz %%0 ; if more than one
mov rax,[src] ; get limb value
mul mlt ; rax * mlt -> rdx (hi), rax (lo)
%2 [dst],rax ; add/sub from destination
adc rdx,byte 0 ; add any carry into high word
mov rax,rdx ; and return the carry value
ret
%1%4:
movsxd len,r8d
mov src,rdx ; source pointer
mov cry,[rsp+0x28] ; carry value
dec len ; test for one limb
jnz %%0 ; if more than one
mov rax,[src] ; get limb value
mul mlt ; rax * mlt -> rdx (hi), rax (lo)
add rax,cry ; add in input carry
adc rdx,byte 0 ; propagate it into rdx
%2 [dst],rax ; add or subtract rax from dest limb
adc rdx,byte 0 ; propagate carry into high word
mov rax,rdx
ret
%%0:
cmp len,byte UNROLL_THRESHOLD
mov rax,[src] ; first limb of source
ja %%2 ; unroll for many limbs
lea src,[src+len*8+8] ; next source limb
lea dst,[dst+len*8] ; current dst limb
neg len
%%1:
mul mlt ; multiply current src limb -> rxx, rax
add rax,cry ; add in carry
adc rdx,byte 0 ; propagate carry into rdx
%2 [dst+len*8],rax ; add or subtract rax from dest limb
mov rax,[src+len*8] ; get next source limb
adc rdx,byte 0 ; add carry or borrow into high word
inc len ; go to next limb
mov cry,rdx ; high word -> carry
jnz %%1
mul mlt ; one more limb to do
add rax,cry
adc rdx,byte 0
%2 [dst],rax
adc rdx,byte 0
mov rax,rdx ; return carry value as a limb
ret
%define jmp_val rbp ; jump into code sequence
%define rep_cnt rbx ; repeats for full sequence
%define cry_hi rsi ; second carry for alternate block
%%2:
push rbp
push rbx
push rsi
lea rep_cnt,[len-2]
dec len
shr rep_cnt,UNROLL_LOG2
neg len
and len,UNROLL_MASK
mov jmp_val,len
mov cry_hi,len ; cry_hi and jmp_val are temporary
shl jmp_val,2 ; values for calculating the jump
shl cry_hi,4 ; offset into the unrolled code
%ifdef PIC
lea cry_hi,[cry_hi+jmp_val]
lea jmp_val,[%%3 wrt rip]
lea jmp_val,[jmp_val+cry_hi]
%else
lea jmp_val,[cry_hi+jmp_val+%%3]
%endif
neg len
mul mlt
add cry,rax ; initial carry, becomes low carry
adc rdx,byte 0
mov cry_hi,rdx
test len,1
mov rax,[src+8] ; src second limb
lea src,[src+len*8+off+16]
lea dst,[dst+len*8+off]
cmovnz cry_hi,cry ; high, low carry other way around
cmovnz cry,rdx
xor len,len
jmp jmp_val
%%3:
%define CHUNK_COUNT 2
%assign i 0
%rep UNROLL_COUNT / CHUNK_COUNT
%assign disp0 8 * i * CHUNK_COUNT - off
mul mlt
%2 [byte dst+disp0],cry
mov cry,len ; len = 0
adc cry_hi,rax
mov rax,[byte src+disp0]
adc cry,rdx
mul mlt
%2 [byte dst+disp0+8],cry_hi
mov cry_hi,len ; len = 0
adc cry,rax
mov rax,[byte src+disp0+8]
adc cry_hi,rdx
%assign i i + 1
%endrep
dec rep_cnt
lea src,[src+UNROLL_BYTES]
lea dst,[dst+UNROLL_BYTES]
jns %%3
mul mlt
%2 [dst-off],cry
adc rax,cry_hi
adc rdx,len
%2 [dst-off+8],rax
adc rdx,len
mov rax,rdx
pop rsi
pop rbx
pop rbp
ret
%endmacro
bits 64
text
mac_sub __g,add,mpn_addmul_1,mpn_addmul_1c
mac_sub __g,sub,mpn_submul_1,mpn_submul_1c
end

View file

@ -1,103 +0,0 @@
; AMD64 mpn_copyd -- decrementing copy limb vector
;
; Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
;
; This file is part of the GNU MP Library.
;
; The GNU MP Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License, or (at your option) any later version.
;
; The GNU MP Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the GNU MP Library; see the file COPYING.LIB. If
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
; Suite 330, Boston, MA 02111-1307, USA.
;
; Provided by Brian Gladman AMD64 using the Microsoft VC++ v8 64-bit
; compiler and the YASM assembler.
;
; Calling interface:
;
; void mpn_copyd(
; mp_ptr dst, rcx
; mp_srcptr src, rdx
; mp_size_t size r8
; )
%define UNROLL_THRESHOLD 16
%define d_ptr rcx
%define s_ptr rdx
%define s_len r8
bits 64
text
global __gmpn_copyd
%ifdef DLL
export __gmpn_copyd
%endif
__gmpn_copyd:
movsxd s_len,r8d
cmp s_len,byte UNROLL_THRESHOLD
jge .2 ; if many limbs to move
dec s_len
jl .1
.0: mov rax,[s_ptr+s_len*8] ; short move via rax
mov [d_ptr+s_len*8],rax
dec s_len
jge .0 ; avoid single byte ret that
.1: rep ret ; interferes with branch prediction
.2: mov rax,s_ptr ; find relative alignment of
xor rax,d_ptr ; source and destination (min
test al,8
jnz .7 ; not 16 byte aligned
lea rax,[s_ptr+s_len*8]
test al,8 ; see if src is on 16 byte
jz .3 ; boundary
dec s_len
mov rax,[rax-8] ; if not do a one limb copy
mov [d_ptr+s_len*8],rax
.3: lea s_len,[s_len-4] ; now 16 byte aligned
.4: prefetchnta [s_ptr+s_len*8+16-3*64] ; should this be -4*64 ??
movdqa xmm0,[s_ptr+s_len*8+16] ; move 32 bytes at a time
movntdq [d_ptr+s_len*8+16],xmm0
movdqa xmm0,[s_ptr+s_len*8]
movntdq [d_ptr+s_len*8],xmm0
sub s_len,4
jge .4
sfence
test s_len,2
jz .5
movdqa xmm0,[s_ptr+s_len*8+16] ; move 16 bytes if necessary
movdqa [d_ptr+s_len*8+16],xmm0
.5 test s_len,1
jz .6
movq xmm0,[s_ptr] ; move 8 bytes if necessary
movq [d_ptr],xmm0
.6: ret
.7: lea s_len,[s_len-2] ; move 8 bytes at a time
.8: movq xmm0,[s_ptr+s_len*8+8]
movq xmm1,[s_ptr+s_len*8]
movq [d_ptr+s_len*8+8],xmm0
movq [d_ptr+s_len*8],xmm1
sub s_len,2
jge .8
test s_len,1
jz .9
movq xmm0,[s_ptr]
movq [d_ptr],xmm0
.9: ret
end

View file

@ -1,107 +0,0 @@
; AMD64 mpn_copyi -- incrementing copy limb vector
;
; Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
;
; This file is part of the GNU MP Library.
;
; The GNU MP Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License, or (at your option) any later version.
;
; The GNU MP Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the GNU MP Library; see the file COPYING.LIB. If
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
; Suite 330, Boston, MA 02111-1307, USA.
;
; Provided by Brian Gladman AMD64 using the Microsoft VC++ v8 64-bit
; compiler and the YASM assembler.
;
; Calling interface:
;
; void mpn_copyi(
; mp_ptr dst, rcx
; mp_srcptr src, rdx
; mp_size_t size r8
; )
%define UNROLL_THRESHOLD 16
%define d_ptr rcx
%define s_ptr rdx
%define s_len r8
bits 64
text
global __gmpn_copyi
%ifdef DLL
export __gmpn_copyi
%endif
__gmpn_copyi:
movsxd s_len,r8d
or s_len,s_len ; none to move?
jz .1
mov rax,s_ptr ; find relative alignment of
xor rax,d_ptr ; source and destination (min
mov r9,s_ptr ; 8-byte alignment assumed)
lea s_ptr,[s_ptr+s_len*8]
lea d_ptr,[d_ptr+s_len*8]
neg s_len
cmp s_len,byte -UNROLL_THRESHOLD
jbe .2 ; if many limbs to move
.0: mov rax,[s_ptr+s_len*8] ; short move via rax
mov [d_ptr+s_len*8],rax
inc s_len
jnz .0 ; avoid single byte ret that
.1: rep ret ; interferes with branch prediction
.2: test al,8
jnz .7 ; not 16 byte aligned
test r9,8 ; see if src is on 16 byte
jz .3 ; boundary
mov rax,[s_ptr+s_len*8] ; if not do a one limb copy
mov [d_ptr+s_len*8],rax
inc s_len
.3: lea s_len,[s_len+3] ; now 16 byte aligned
.4: prefetchnta [s_ptr+s_len*8-24+3*64] ; should this be +4*64 ??
movdqa xmm0,[s_ptr+s_len*8-24] ; move 32 bytes at a time
movntdq [d_ptr+s_len*8-24],xmm0
movdqa xmm0,[s_ptr+s_len*8-8]
movntdq [d_ptr+s_len*8-8],xmm0
add s_len,4
jl .4
sfence
test s_len,2
jnz .5
movdqa xmm0,[s_ptr+s_len*8-24] ; move 16 bytes if necessary
movdqa [d_ptr+s_len*8-24],xmm0
add s_len,2
.5 test s_len,1
jnz .6
movq xmm0,[s_ptr+s_len*8-24] ; move 8 bytes if necessary
movq [d_ptr+s_len*8-24],xmm0
.6: ret
.7: lea s_len,[s_len+1] ; move 8 bytes at a time
.8: movq xmm0,[s_ptr+s_len*8-8]
movq xmm1,[s_ptr+s_len*8]
movq [d_ptr+s_len*8-8],xmm0
movq [d_ptr+s_len*8],xmm1
add s_len,2
jl .8
test s_len,1
jnz .9
movq xmm0,[s_ptr-8]
movq [d_ptr-8],xmm0
.9: ret
end

View file

@ -1,160 +0,0 @@
;
; AMD64 mpn_divexact_1 -- mpn by limb exact division
;
; Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
;
; This file is part of the GNU MP Library.
;
; The GNU MP Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License, or (at your option) any later version.
;
; The GNU MP Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the GNU MP Library; see the file COPYING.LIB. If
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
; Suite 330, Boston, MA 02111-1307, USA.
;
; Adapted by Brian Gladman AMD64 using the Microsoft VC++ v8 64-bit
; compiler and the YASM assembler.
;
; Calling interface:
;
; void mpn_divexact_1(
; mp_ptr dst, rcx
; mp_srcptr src, rdx
; mp_size_t size, r8
; mp_limb_t divisor r9
; )
;
; since the inverse takes a while to setup,plain division is used for small
; Multiplying works out faster for size>=3 when the divisor is odd or size>=4
; when the divisor is even.
bits 64
text
extern __gmp_modlimb_invert_table
global __gmpn_divexact_1
%ifdef DLL
export __gmpn_divexact_1
%endif
__gmpn_divexact_1:
movsxd r8,r8d
mov r10,rdx
mov rax,r9
and rax,byte 1
add rax,r8
cmp rax,byte 4
jae L_mul_by_inverse
xor rdx,rdx
L_div_top:
mov rax,[r10+r8*8-8]
div r9
mov [rcx+r8*8-8],rax
dec r8
jnz L_div_top
rep ret ; avoid single byte return
L_mul_by_inverse:
push rsi
push rdi
mov rsi,rdx ; src pointer
mov rdi,rcx ; dst pointer
mov rax,r9
stc
sbb rcx,rcx ; -1 -> rcx, r11
mov r11,rcx
L_strip_twos:
shr rax,1
inc rcx
jnc L_strip_twos
lea r9,[rax+rax+1]
and rax,byte 127
lea rdx,[__gmp_modlimb_invert_table wrt rip]
movzx rax,byte [rdx+rax]
; If f(x) = 0, then x[n+1] = x[n] - f(x) / f'(x) is Newton's iteration for a
; root. With f(x) = 1/x - v we obtain x[n + 1] = 2 * x[n] - v * x[n] * x[n]
; as an iteration for x = 1 / v. This provides quadratic convergence so
; that the number of bits of precision doubles on each iteration. The
; iteration starts with 8-bit precision.
lea edx, [rax+rax]
imul eax, eax
imul eax, r9d
sub edx, eax ; inv -> rdx (16-bit approx)
lea eax, [rdx+rdx]
imul edx, edx
imul edx, r9d
sub eax, edx ; inv -> rcx (32-bit approx)
lea rdx, [rax+rax]
imul rax, rax
imul rax, r9
sub rdx, rax ; inv -> rcx (64-bit approx)
mov r8,r8
lea rsi,[rsi+r8*8]
lea rdi,[rdi+r8*8]
neg r8
mov r10,rdx
xor r11,r11
mov rax,[rsi+r8*8]
or rcx,rcx
mov rdx,[rsi+r8*8+8]
jz L_odd_entry
shrd rax,rdx,cl
inc r8
jmp L_even_entry
L_odd_top:
mul r9
mov rax,[rsi+r8*8]
sub rdx,r11
sub rax,rdx
sbb r11,r11
L_odd_entry:
imul rax,r10
mov [rdi+r8*8],rax
inc r8
jnz L_odd_top
pop rdi
pop rsi
ret
L_even_top:
mul r9
sub rdx,r11
mov rax,[rsi+r8*8-8]
mov r11,[rsi+r8*8]
shrd rax,r11,cl
sub rax,rdx
sbb r11,r11
L_even_entry:
imul rax,r10
mov [rdi+r8*8-8],rax
inc r8
jnz L_even_top
mul r9
mov rax,[rsi-8]
sub rdx,r11
shr rax,cl
sub rax,rdx
imul rax,r10
mov [rdi-8],rax
pop rdi
pop rsi
ret
end

View file

@ -1,46 +0,0 @@
/* Generic x86 gmp-mparam.h -- Compiler/machine parameter header file.
Copyright 1991, 1993, 1994, 2000, 2001, 2002 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or (at your
option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
MA 02111-1307, USA. */
#ifndef GMP_MPARAM_H
#define GMP_MPARAM_H
#ifndef BITS_PER_MP_LIMB
#define BITS_PER_MP_LIMB 64
#elif BITS_PER_MP_LIMB != 64
#error Bad configuration in gmp-mparam.h
#endif
#ifndef BYTES_PER_MP_LIMB
#define BYTES_PER_MP_LIMB 8
#elif BYTES_PER_MP_LIMB != 8
#error Bad configuration in gmp-mparam.h
#endif
/* Generic x86 mpn_divexact_1 is faster than generic x86 mpn_divrem_1 on all
of p5, p6, k6 and k7, so use it always. It's probably slower on 386 and
486, but that's too bad. */
#define DIVEXACT_1_THRESHOLD 0
#define SQR_KARATSUBA_THRESHOLD 33
#define MUL_KARATSUBA_THRESHOLD 26
#define MUL_TOOM3_THRESHOLD 298
#endif

View file

@ -1,85 +0,0 @@
; AMD64 mpn_lshift -- mpn left shift
;
; Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
;
; This file is part of the GNU MP Library.
;
; The GNU MP Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License, or (at your option) any later version.
;
; The GNU MP Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the GNU MP Library; see the file COPYING.LIB. If
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
; Suite 330, Boston, MA 02111-1307, USA.
;
; Adapted by Brian Gladman AMD64 using the Microsoft VC++ v8 64-bit
; compiler and the YASM assembler.
;
; Calling interface:
;
; mp_limb_t mpn_lshift(
; mp_ptr dst, rcx
; mp_srcptr src, rdx
; mp_size_t size, r8
; unsigned shift r9
; )
%define s_len r8
%define r_tmp r9
%define d_ptr r10
%define s_ptr r11
bits 64
text
global __gmpn_lshift
%ifdef DLL
export __gmpn_lshift
%endif
__gmpn_lshift:
movsxd s_len,r8d
or s_len,s_len
jz .0
mov d_ptr,rcx
mov s_ptr,rdx
mov rcx,r9
cmp s_len,byte 2
jge .1
or s_len,s_len
mov rax,[s_ptr]
mov r_tmp,rax
shl r_tmp,cl
neg cl
mov [d_ptr],r_tmp
shr rax,cl
.0: ret
.1: dec s_len
mov rdx,[s_ptr+s_len*8]
push rdx
.2: mov rax,[s_ptr+s_len*8-8]
mov r_tmp,rax
shl rdx,cl
neg cl
shr r_tmp,cl
neg cl
or r_tmp,rdx
mov rdx,rax
mov [d_ptr+s_len*8],r_tmp
dec s_len
jnz .2
shl rax,cl
mov [d_ptr],rax
neg cl
pop rax
shr rax,cl
ret

View file

@ -1,153 +0,0 @@
; AMD64 mpn_modexact_1_odd -- exact division style remainder.
; Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2006 Free Software
; Foundation, Inc.
;
; This file is part of the GNU MP Library.
;
; The GNU MP Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License, or (at your option) any later version.
;
; The GNU MP Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the GNU MP Library; see the file COPYING.LIB. If
; not, write to the Free Software Foundation, Inc., 51 Franklin Street,
; Fifth Floor, Boston, MA 02110-1301, USA.
;
; cycles/limb
; Hammer: 10
; Prescott/Nocona: 33
; mp_limb_t mpn_modexact_1_odd (mp_srcptr src, mp_size_t size,
; mp_limb_t divisor);
; mp_limb_t mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size,
; mp_limb_t divisor, mp_limb_t carry);
;
; The dependent chain in the main loop is
;
; cycles
; subq %r8, %rax 1
; imulq %r9, %rax 4
; mulq %rsi 5
; ----
; total 10
;
; The movq load from src seems to need to be scheduled back before the jz to
; achieve this speed, out-of-order execution apparently can't completely
; hide the latency otherwise.
;
; The l=src[i]-cbit step is rotated back too, since that allows us to avoid
; it for the first iteration (where there's no cbit).
;
; The code alignment used (32-byte) for the loop also seems necessary.
; Without that the non-PIC case has adcq crossing the 0x60 offset,
; apparently making it run at 11 cycles instead of 10.
;
; Not done:
;
; divq for size==1 was measured at about 79 cycles, compared to the inverse
; at about 25 cycles (both including function call overheads), so that's not
; used.
;
; Enhancements:
;
; For PIC, we shouldn't really need the GOT fetch for modlimb_invert_table,
; it'll be in rodata or text in libgmp.so and can be accessed directly %rip
; relative. This would be for small model only (something we don't
; presently detect, but which is all that gcc 3.3.3 supports), since 8-byte
; PC-relative relocations are apparently not available. Some rough
; experiments with binutils 2.13 looked worrylingly like it might come out
; with an unwanted text segment relocation though, even with ".protected".
; mp_limb_t mpn_modexact_1_odd (
; mp_srcptr src, rcx
; mp_size_t size, rdx
; mp_limb_t divisor r8
; );
; mp_limb_t mpn_modexact_1c_odd (
; mp_srcptr src, rcx
; mp_size_t size, rdx
; mp_limb_t divisor, r8
; mp_limb_t carry r10
; );
bits 64
text
align 32
global __gmpn_modexact_1_odd
global __gmpn_modexact_1c_odd
extern __gmp_modlimb_invert_table
%ifdef DLL
export __gmpn_modexact_1_odd
export __gmpn_modexact_1c_odd
%endif
__gmpn_modexact_1_odd:
mov r9d, 0 ; carry
__gmpn_modexact_1c_odd:
push rsi
push rdi
mov rsi, rdx
mov rdx, r8
shr edx, 1 ; div / 2
lea r10, [__gmp_modlimb_invert_table wrt rip]
and edx, 127
movzx edx, byte [rdx+r10] ; inv -> rdx (8-bit approx)
mov rax, [rcx]
lea r11, [rcx+rsi*8] ; pointer to top of src
mov rdi, r8 ; save divisor
lea ecx, [rdx+rdx]
imul edx, edx
neg rsi ; limb offset from top of drc
imul edx, edi
sub ecx, edx ; inv -> rcx (16-bit approx)
lea edx, [rcx+rcx]
imul ecx, ecx
imul ecx, edi
sub edx, ecx ; inv -> rdx (32-bit approx)
xor ecx, ecx
lea r10, [rdx+rdx]
imul rdx, rdx
imul rdx, r8
sub r10, rdx ; inv -> r10 (64-bit approx)
mov rdx, r9 ; intial carry -> rdx
inc rsi ; adjust limb offset
jz .1
align 16
.0: sub rax, rdx
adc rcx, 0
imul rax, r10
mul r8
mov rax, [r11+8*rsi]
sub rax, rcx
setc cl
inc rsi
jnz .0
.1: sub rax, rdx
adc rcx, 0
imul rax, r10
mul r8
lea rax, [rcx+rdx]
pop rdi
pop rsi
ret
end

View file

@ -1,209 +0,0 @@
;
; AMD64 mpn_mul_1 -- mpn by limb multiply.
;
; Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
;
; This file is part of the GNU MP Library.
;
; The GNU MP Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License, or (at your option) any later version.
;
; The GNU MP Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the GNU MP Library; see the file COPYING.LIB. If
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
; Suite 330, Boston, MA 02111-1307, USA.
;
; Adapted by Brian Gladman AMD64 using the Microsoft VC++ v8 64-bit
; compiler and the YASM assembler.
;
; Calling interface:
; mp_limb_t mpn_mul_1 (
; mp_ptr dst, rcx
; mp_srcptr src, rdx
; mp_size_t size, r8
; mp_limb_t multiplier r9
; )
;
; mp_limb_t mpn_mul_1c (
; mp_ptr dst, rcx
; mp_srcptr src, rdx
; mp_size_t size, r8
; mp_limb_t multiplier, r9
; mp_limb_t carry [rsp+0x28]
; )
;
; Multiply src[size] by mult and store the result in dst[size]. Return the
; carry limb from the top of the result.
;
; mpn_mul_1c() accepts an initial carry for the calculation, it's added into
; the low limb of the destination.
; Maximum possible UNROLL_COUNT with the current code is 64.
%if 1
%define dst rcx
%define len r8
%define mlt r9
%define cry [rsp+0x28]
%define src r10 ; from rdx on input
%define cnt rbx ; loop count
%define UNROLL_LOG2 4
%define UNROLL_COUNT (1 << UNROLL_LOG2)
%define UNROLL_MASK (UNROLL_COUNT - 1)
%define UNROLL_BYTES 8 * UNROLL_COUNT
%if UNROLL_BYTES >= 256
%error unroll count is too large
%elif UNROLL_BYTES >= 128
%define off 128
%else
%define off 0
%endif
%define UNROLL_THRESHOLD 7
bits 64
text
global __gmpn_mul_1
global __gmpn_mul_1c
%ifdef DLL
export __gmpn_mul_1
export __gmpn_mul_1c
%define PIC
%endif
__gmpn_mul_1c:
mov r11,[rsp+0x28]
jmp start
__gmpn_mul_1:
xor r11,r11
start:
movsxd len,r8d
mov src,rdx
cmp len,UNROLL_THRESHOLD
jae .1
lea src,[src+len*8]
lea dst,[dst+len*8]
neg len
.0: mov rax,[src+len*8]
mul mlt
add rax,r11
mov r11,dword 0
adc r11,rdx
mov [dst+len*8],rax
inc len
jnz .0
mov rax,r11
ret
; The mov to load the next source limb is done well ahead of the mul, this
; is necessary for full speed. It leads to one limb handled separately
; after the loop.
;
; When unrolling to 32 or more, an offset of +4 is used on the src pointer,
; to avoid having an 0x80 displacement in the code for the last limb in the
; unrolled loop. This is for a fair comparison between 16 and 32 unrolling.
.1:
push rbx
lea cnt,[len-2]
lea len,[len-1]
neg len
shr cnt,UNROLL_LOG2
and len,UNROLL_MASK
mov rdx,len
shl rdx,4
%ifdef PIC
lea rax,[.3 wrt rip]
lea rdx,[rdx+len*4]
lea rdx,[rdx+rax]
%else
lea rdx,[rdx+len*4+.3]
%endif
mov rax,[src]
neg len
lea src,[src+len*8+off]
lea dst,[dst+len*8+off]
xor len,len ; len now zero
jmp rdx
.3:
%assign i 0
%rep UNROLL_COUNT
%define disp 8 * i - off
mul mlt ; 20 bytes per block
add r11,rax
mov rax,[byte src+disp+8]
mov [byte dst+disp],r11
mov r11,len
adc r11,rdx
%assign i i + 1
%endrep
dec cnt
lea src,[src+UNROLL_BYTES]
lea dst,[dst+UNROLL_BYTES]
jns .3
mul mlt
add r11,rax
mov rax,len
mov [dst-off],r11
adc rax,rdx
pop rbx
ret
%else
bits 64
text
global __gmpn_mul_1
global __gmpn_mul_1c
%ifdef DLL
export __gmpn_mul_1
export __gmpn_mul_1c
%endif
__gmpn_mul_1c:
mov r11, [rsp+0x28]
jmp start
align 16
nop
nop
__gmpn_mul_1:
xor r11, r11
start:
lea r10, [rdx+8*r8]
lea rcx, [rcx+8*r8]
neg r8
.1: mov rax, [r10+8*r8]
mul r9
add rax, r11
mov r11d, 0
adc r11, rdx
mov [rcx+8*r8], rax
inc r8
jne .1
mov rax, r11
ret
%endif
end

View file

@ -1,277 +0,0 @@
;
; AMD64 mpn_mul_basecase -- multiply two mpn numbers.
;
; Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
;
; This file is part of the GNU MP Library.
;
; The GNU MP Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License, or (at your option) any later version.
;
; The GNU MP Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the GNU MP Library; see the file COPYING.LIB. If
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
; Suite 330, Boston, MA 02111-1307, USA.
;
; Adapted by Brian Gladman AMD64 using the Microsoft VC++ v8 64-bit
; compiler and the YASM assembler.
;
; Calling interface:
;
; void __gmpn_mul_basecase(
; mp_ptr rp, rcx
; mp_srcptr xp, rdx
; mp_size_t xn, r8
; mp_srcptr yp, r9
; mp_size_t yn [rsp+0x28] as a *** 32-bit *** word
; )
;
; Multiply up[un] by vp[vn] and write the result to rp[un+vn] with un >= vn on
; entry.
%include "amd64i.inc"
%define UNROLL_LOG2 4
%define UNROLL_COUNT (1 << UNROLL_LOG2)
%define UNROLL_MASK (UNROLL_COUNT - 1)
%define UNROLL_BYTES (8 * UNROLL_COUNT)
%define UNROLL_THRESHOLD 5
bits 64
text
%define v_par rsp + 16
%define v_adj rsp + 8
%define v_xlo rsp
%define v_len 24
%define r_ptr rcx
%define x_ptr r11
%define x_len r8
%define y_ptr r9
%define y_len r10
%define v_ctr r8 ; x_len reused
%define v_jmp r11 ; x_ptr reused
%define reg_list x86_regs,r12
global __gmpn_mul_basecase
%ifdef DLL
export __gmpn_mul_basecase
%define PIC
%endif
__gmpn_mul_basecase:
movsxd x_len,r8d
mov rax,[y_ptr]
cmp x_len,2
ja mul_m_by_n
je mul_2_by_n
mul qword [rdx]
mov [r_ptr],rax
mov [r_ptr+8],rdx
ret
mul_2_by_n:
movsxd r10,dword[rsp+0x28] ; load as a 32-bit integer
mov x_ptr,rdx
dec qword y_len
jnz mul_2_by_2
mov r8,rax ; y[0] -> r8 (was x_len)
mov rax,[x_ptr]
mul r8
mov [r_ptr],rax
mov rax,[x_ptr+8]
mov r9,rdx ; carry -> r9 (was y_ptr)
mul r8
add r9,rax
mov [r_ptr+8],r9
adc rdx,y_len ; note: r10 = 0 (was y_len)
mov [r_ptr+16],rdx
ret
mul_2_by_2: ; r8 (x_len) and r10 (y_len) free
mov r10,[x_ptr] ; x[0]
mul r10 ; y[0] * x[0]
mov [r_ptr],rax
mov r8,rdx ; cry = { 0, r8 }
mov rax,[y_ptr+8] ; y[1]
mul r10 ; y[1] * x[0]
add r8,rax
adc rdx,byte 0
mov r10,[x_ptr+8] ; x[1] - r11 (x_ptr) now free
mov r11,rdx ; cry = { r11, r8 }
mov rax,[y_ptr] ; y[0]
mul r10 ; y[0] * x[1]
add r8,rax
adc r11,rdx
mov [r_ptr+8],r8
mov r8,dword 0
adc r8,r8 ; cry = { r8, r11 }
mov rax,[y_ptr+8] ; y[1]
mul r10 ; x[1] * y[1]
add rax,r11
adc rdx,r8
mov [r_ptr+16],rax
mov [r_ptr+24],rdx
ret
; do first multiply of y[0] * x[n] as it can simply be stored
mul_m_by_n:
mov r10d,dword[rsp+0x28] ; load as a 32-bit integer
f_push reg_list
mov x_ptr,rdx
mov r12,x_len
mov rbp,rax ; y[0] -> rbp
xor rbx,rbx ; for carry
lea rsi,[x_ptr+r12*8] ; past end of x[]
lea rdi,[r_ptr+r12*8] ; past end of r[]
neg r12
.0: mov rax,[rsi+r12*8] ; x[n]
mul rbp ; x[n] * y[0]
add rax,rbx ; add carry from previous round
mov [rdi+r12*8],rax ; store r[n]
mov rbx,dword 0 ; propagate carry
adc rbx,rdx
inc r12 ; next iteration
jnz .0
mov [rdi],rbx ; store final digit in carry
mov rdx,y_len ; done if y_len is 1
dec rdx
jnz .1 ; more to do
f_pop reg_list
ret
.1: cmp x_len,UNROLL_THRESHOLD ; unroll if many loops
jae L_unroll
lea y_ptr,[y_ptr+rdx*8+8] ; pointer to end limb of y[]
neg x_len ; negative counter for x[n]
neg rdx ; negative counter for y[n]
mov rax,[rsi+x_len*8] ; x[0] -> rax
mov y_len,rdx ; now -(y_len - 1)
inc x_len ; negative counter for x[1]
xor rbx,rbx ; for carry
mov rcx,x_len ; now -(x_len - 1) -> rcx (was r_ptr)
mov rbp,[y_ptr+rdx*8] ; y[n] -> rbp
jmp .3
.2: mov rcx,x_len ; restore x[] counter
xor rbx,rbx ; clear carry
add rdi,8 ; increase end of r[] pointer
mov rbp,[y_ptr+y_len*8] ; y[n] -> rbp
mov rax,[rsi+rcx*8-8] ; x[m] -> rax
.3: mul rbp ; x[m] * y[n]
add rbx,rax ; add carry
adc rdx,byte 0
add [rdi+rcx*8],rbx ; add into r[]
mov rax,[rsi+rcx*8] ; next x[m] ->rax
adc rdx,byte 0 ; add carry to rdx
inc rcx ; got to next limb of x[]
mov rbx,rdx ; move carry into rbx
jnz .3 ; got to next limb of x[]
mul rbp ; do last limb
add rbx,rax ; propagate carry
adc rdx,byte 0
add [rdi],rbx ; add into r[]
adc rdx,byte 0 ; add add in any carry
inc y_len
mov [rdi+8],rdx ; move (not add) carry into r[]
jnz .2 ; go to next limb of y[]
f_pop reg_list
ret
L_unroll:
f_add v_len
mov rdi,r_ptr
mov rcx,x_len
mov rsi,x_ptr
mov rbp,[y_ptr+8]
lea y_ptr,[y_ptr+rdx*8+8]
neg rdx
mov y_len,rdx
lea rbx,[UNROLL_COUNT-2+rcx]
dec rcx
mov rax,[rsi] ; x[0]
and rbx,-UNROLL_MASK-1
neg rcx
neg rbx
and rcx,UNROLL_MASK
mov [v_par],rcx
mov [v_adj],rbx
mov rdx,rcx
shl rcx,3
%ifdef PIC
lea rcx,[rcx+rcx*2]
lea v_jmp,[.4 wrt rip]
lea v_jmp,[v_jmp+rcx]
%else
lea v_jmp,[rcx+rcx*2+.4]
%endif
neg rdx
mov [v_xlo],rax
lea rdi,[rdi+rdx*8+8]
lea rsi,[rsi+rdx*8+8]
jmp .3
.2: mov rbx,[v_adj]
mov rax,[v_xlo]
lea rdi,[rdi+rbx*8+8]
lea rsi,[rsi+rbx*8]
mov rbp,[y_ptr+y_len*8]
.3: mul rbp
sar rbx,UNROLL_LOG2
mov rcx,[v_par]
mov v_ctr,rbx
test cl,1 ; low word of product + carry
mov rbx,dword 0 ; is in rcx on even rounds and
mov rcx,dword 0 ; rbx on odd rounds - we must
cmovz rcx,rax ; put low word of first product
cmovnz rbx,rax ; in the right register here
jmp v_jmp
.4:
%define CHUNK_COUNT 2
%assign i 0
%rep UNROLL_COUNT / CHUNK_COUNT
%define disp0 8 * i * CHUNK_COUNT
mov rax,[byte rsi+disp0]
adc rbx,rdx
mul rbp
add [byte rdi+disp0],rcx
mov rcx,dword 0
adc rbx,rax
mov rax,[byte rsi+disp0+8]
adc rcx,rdx
mul rbp
add [byte rdi+disp0+8],rbx
mov rbx,dword 0
adc rcx,rax
%assign i i + 1
%endrep
inc v_ctr
lea rsi,[UNROLL_BYTES+rsi]
lea rdi,[UNROLL_BYTES+rdi]
jnz .4
adc rdx,byte 0
add [rdi],rcx
adc rdx,byte 0
inc y_len
mov [rdi+8],rdx
jnz .2
f_sub v_len
f_pop reg_list
ret
end

View file

@ -1,87 +0,0 @@
; AMD64 mpn_rshift -- mpn right shift
;
; Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
;
; This file is part of the GNU MP Library.
;
; The GNU MP Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License, or (at your option) any later version.
;
; The GNU MP Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the GNU MP Library; see the file COPYING.LIB. If
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
; Suite 330, Boston, MA 02111-1307, USA.
;
; Adapted by Brian Gladman AMD64 using the Microsoft VC++ v8 64-bit
; compiler and the YASM assembler.
;
; Calling interface:
;
; mp_limb_t mpn_rshift(
; mp_ptr dst, rcx
; mp_srcptr src, rdx
; mp_size_t size, r8
; unsigned shift r9
; )
%define s_len r8
%define r_tmp r9
%define d_ptr r10
%define s_ptr r11
bits 64
text
global __gmpn_rshift
%ifdef DLL
export __gmpn_rshift
%endif
__gmpn_rshift:
movsxd s_len,r8d
or s_len,s_len
jz .0
mov d_ptr,rcx
mov s_ptr,rdx
mov rcx,r9
cmp s_len,byte 2
jge .1
mov rax,[s_ptr]
mov r_tmp,rax
shr r_tmp,cl
neg cl
mov [d_ptr],r_tmp
shl rax,cl
.0: ret
.1: lea s_ptr,[s_ptr+s_len*8]
lea d_ptr,[d_ptr+s_len*8]
neg s_len
mov rdx,[s_ptr+s_len*8]
push rdx
shr rdx,cl
neg cl
inc s_len
.2: mov rax,[s_ptr+s_len*8]
mov r_tmp,rax
shl r_tmp,cl
neg cl
xor r_tmp,rdx
shr rax,cl
neg cl
mov rdx,rax
mov [d_ptr+s_len*8-8],r_tmp
inc s_len
jnz .2
mov [d_ptr-8],rax
pop rax
shl rax,cl
ret

View file

@ -1,316 +0,0 @@
;
; AMD64 mpn_sqr_basecase -- square an mpn number.
;
; Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
;
; This file is part of the GNU MP Library.
;
; The GNU MP Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License, or (at your option) any later version.
;
; The GNU MP Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the GNU MP Library; see the file COPYING.LIB. If
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
; Suite 330, Boston, MA 02111-1307, USA.
;
; Adapted by Brian Gladman AMD64 using the Microsoft VC++ v8 64-bit
; compiler and the YASM assembler.
;
; Calling interface:
;
; void mpn_sqr_basecase(
; mp_ptr dst, rcx
; mp_srcptr src, rdx
; mp_size_t size r8
; )
;
%include "amd64i.inc"
%define UNROLL_COUNT 31
%define CODE_BYTES_PER_LIMB 25 ; must be odd
%if UNROLL_COUNT > 15
%define off (UNROLL_COUNT - 15) * 8
%else
%define off 0
%endif
%define reg_list_2 x86_regs,r12,r13
%define r_ptr r10
%define x_ptr r9
%define x_len r8
%define v_ctr r12
%define v_jmp r13
bits 64
text
global __gmpn_sqr_basecase
%ifdef DLL
export __gmpn_sqr_basecase
%define PIC
%endif
__gmpn_sqr_basecase:
movsxd x_len,r8d
cmp x_len,2
je sqr_2
ja sqr_3_plus
mov rax,[rdx]
mul rax
mov [rcx+8],rdx
mov [rcx],rax
ret
sqr_2:
mov r_ptr,rcx
mov x_ptr,rdx
mov r8,[x_ptr]
mov r9,[x_ptr+8]
mov rax,r8
mul r8
mov [r_ptr],rax
mov [r_ptr+8],rdx
mov rax,r9
mul r9
mov [r_ptr+16],rax
mov [r_ptr+24],rdx
xor rcx,rcx
mov rax,r8
mul r9
add rax,rax
adc rdx,rdx
adc rcx,rcx
add [r_ptr+8],rax
adc [r_ptr+16],rdx
adc [r_ptr+24],rcx
ret
sqr_3_plus:
mov r_ptr,rcx
mov x_ptr,rdx
cmp x_len,4
jae sqr_4_plus
f_push x86_regs
mov rax,[x_ptr]
mul rax
mov [r_ptr],rax
mov rax,[x_ptr+8]
mov [r_ptr+8],rdx
mul rax
mov [r_ptr+16],rax
mov rax,[x_ptr+16]
mov [r_ptr+24],rdx
mul rax
mov [r_ptr+32],rax
mov rax,[x_ptr]
mov [r_ptr+40],rdx
mul qword [x_ptr+8]
mov rsi,rax
mov rax,[x_ptr]
mov rdi,rdx
mul qword [x_ptr+16]
add rdi,rax
mov rbp,dword 0
mov rax,[x_ptr+8]
adc rbp,rdx
mul qword [x_ptr+16]
xor x_ptr,x_ptr
add rbp,rax
adc rdx,dword 0
adc rdx,dword 0
add rsi,rsi
adc rdi,rdi
mov rax,[r_ptr+8]
adc rbp,rbp
adc rdx,rdx
adc x_ptr,dword 0
add rsi,rax
mov rax,[r_ptr+16]
adc rdi,rax
mov rax,[r_ptr+24]
mov [r_ptr+8],rsi
adc rbp,rax
mov rax,[r_ptr+32]
mov [r_ptr+16],rdi
adc rdx,rax
mov rax,[r_ptr+40]
mov [r_ptr+24],rbp
adc rax,x_ptr
mov [r_ptr+32],rdx
mov [r_ptr+40],rax
f_pop x86_regs
ret
sqr_4_plus:
f_push reg_list_2
mov rcx,x_len
lea rdi,[r_ptr+rcx*8]
lea rsi,[x_ptr+rcx*8]
mov rbp,[x_ptr]
mov rbx,dword 0
dec rcx
neg rcx
.0: mov rax,[rsi+rcx*8]
mul rbp
add rax,rbx
mov [rdi+rcx*8],rax
mov rbx,dword 0
adc rbx,rdx
inc rcx
jnz .0
mov rcx,x_len
mov [rdi],rbx
sub rcx,4
jz L_corner
neg rcx
%if off != 0
sub rdi,off
sub rsi,off
%endif
mov rdx,rcx
shl rcx,3
lea rcx,[rcx+rcx*2]
%ifdef PIC
lea v_jmp,[.3 wrt rip]
lea rcx,[rcx+rdx+(UNROLL_COUNT - 2) * CODE_BYTES_PER_LIMB]
lea rcx,[rcx+v_jmp]
%else
lea rcx,[rcx+rdx+(UNROLL_COUNT - 2) * CODE_BYTES_PER_LIMB+.3]
%endif
.2: lea v_jmp,[rcx+CODE_BYTES_PER_LIMB]
mov rbp,[rsi+rdx*8-24+off]
mov rax,[rsi+rdx*8-16+off]
mov v_ctr,rdx
mul rbp
test cl,1
mov rbx,rdx
mov rcx,rax
%if (UNROLL_COUNT % 2)
cmovnz rbx,rax
cmovnz rcx,rdx
%else
cmovz rbx,rax
cmovz rcx,rdx
%endif
xor rdx,rdx
lea rdi,[rdi+8]
jmp v_jmp
align 2
.3:
%assign i UNROLL_COUNT
%rep UNROLL_COUNT
%define disp_src off - 8 * i
%if disp_src < -120 || disp_src >= 128
%error source dispacement too large
%endif
%if (i % 2) = 0 ; 25 bytes of code per limb
nop
mov rax,[byte rsi + disp_src]
adc rbx,rdx
mul rbp
add [byte rdi + disp_src - 8],rcx
mov rcx,dword 0
adc rbx,rax
%else
nop
mov rax,[byte rsi + disp_src]
adc rcx,rdx
mul rbp
add [byte rdi + disp_src - 8],rbx
%if i != 1
mov rbx,dword 0
%endif
adc rcx,rax
%endif
%assign i i - 1
%endrep
adc rdx,dword 0
add [rdi-8+off],rcx
mov rcx,v_jmp
adc rdx,dword 0
mov [rdi+off],rdx
mov rdx,v_ctr
inc rdx
jnz .2
%if off != 0
add rsi,off
add rdi,off
%endif
L_corner:
mov rbp,[rsi-24]
mov rax,[rsi-16]
mov rcx,rax
mul rbp
add [rdi-8],rax
mov rax,[rsi-8]
adc rdx,dword 0
mov rbx,rdx
mov rsi,rax
mul rbp
add rax,rbx
adc rdx,dword 0
add [rdi],rax
mov rax,rsi
adc rdx,dword 0
mov rbx,rdx
mul rcx
add rax,rbx
mov [rdi+8],rax
adc rdx,dword 0
mov [rdi+16],rdx
mov rax,x_len ; start of shift
mov rdi,r_ptr
xor rcx,rcx
lea r11,[rax+rax]
lea rdi,[rdi+r11*8]
not rax
lea rax,[rax+2]
.0: lea r11,[rax+rax]
rcl qword [rdi+r11*8-8],1
rcl qword [rdi+r11*8],1
inc rax
jnz .0
setc al
mov rsi,x_ptr
mov [rdi-8],rax
mov rcx,x_len
mov rax,[rsi]
mul rax
lea rsi,[rsi+rcx*8]
neg rcx
lea r11,[rcx+rcx]
mov [rdi+r11*8],rax
inc rcx
.1: lea r11,[rcx+rcx]
mov rax,[rsi+rcx*8]
mov rbx,rdx
mul rax
add [rdi+r11*8-8],rbx
adc [rdi+r11*8],rax
adc rdx,dword 0
inc rcx
jnz .1
add [rdi-8],rdx
f_pop reg_list_2
ret
end

View file

@ -1,160 +0,0 @@
/* mpn_addsub_n -- Add and Subtract two limb vectors of equal, non-zero length.
Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or (at your
option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#include "gmp.h"
#include "gmp-impl.h"
#ifndef L1_CACHE_SIZE
#define L1_CACHE_SIZE 8192 /* only 68040 has less than this */
#endif
#define PART_SIZE (L1_CACHE_SIZE / BYTES_PER_MP_LIMB / 6)
/* mpn_addsub_n.
r1[] = s1[] + s2[]
r2[] = s1[] - s2[]
All operands have n limbs.
In-place operations allowed. */
mp_limb_t
mpn_addsub_n (mp_ptr r1p, mp_ptr r2p, mp_srcptr s1p, mp_srcptr s2p, mp_size_t n)
{
mp_limb_t acyn, acyo; /* carry for add */
mp_limb_t scyn, scyo; /* carry for subtract */
mp_size_t off; /* offset in operands */
mp_size_t this_n; /* size of current chunk */
/* We alternatingly add and subtract in chunks that fit into the (L1)
cache. Since the chunks are several hundred limbs, the function call
overhead is insignificant, but we get much better locality. */
/* We have three variant of the inner loop, the proper loop is chosen
depending on whether r1 or r2 are the same operand as s1 or s2. */
if (r1p != s1p && r1p != s2p)
{
/* r1 is not identical to either input operand. We can therefore write
to r1 directly, without using temporary storage. */
acyo = 0;
scyo = 0;
for (off = 0; off < n; off += PART_SIZE)
{
this_n = MIN (n - off, PART_SIZE);
#if HAVE_NATIVE_mpn_add_nc
acyo = mpn_add_nc (r1p + off, s1p + off, s2p + off, this_n, acyo);
#else
acyn = mpn_add_n (r1p + off, s1p + off, s2p + off, this_n);
acyo = acyn + mpn_add_1 (r1p + off, r1p + off, this_n, acyo);
#endif
#if HAVE_NATIVE_mpn_sub_nc
scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo);
#else
scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n);
scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo);
#endif
}
}
else if (r2p != s1p && r2p != s2p)
{
/* r2 is not identical to either input operand. We can therefore write
to r2 directly, without using temporary storage. */
acyo = 0;
scyo = 0;
for (off = 0; off < n; off += PART_SIZE)
{
this_n = MIN (n - off, PART_SIZE);
#if HAVE_NATIVE_mpn_sub_nc
scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo);
#else
scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n);
scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo);
#endif
#if HAVE_NATIVE_mpn_add_nc
acyo = mpn_add_nc (r1p + off, s1p + off, s2p + off, this_n, acyo);
#else
acyn = mpn_add_n (r1p + off, s1p + off, s2p + off, this_n);
acyo = acyn + mpn_add_1 (r1p + off, r1p + off, this_n, acyo);
#endif
}
}
else
{
/* r1 and r2 are identical to s1 and s2 (r1==s1 and r2==s2 or vice versa)
Need temporary storage. */
mp_limb_t tp[PART_SIZE];
acyo = 0;
scyo = 0;
for (off = 0; off < n; off += PART_SIZE)
{
this_n = MIN (n - off, PART_SIZE);
#if HAVE_NATIVE_mpn_add_nc
acyo = mpn_add_nc (tp, s1p + off, s2p + off, this_n, acyo);
#else
acyn = mpn_add_n (tp, s1p + off, s2p + off, this_n);
acyo = acyn + mpn_add_1 (tp, tp, this_n, acyo);
#endif
#if HAVE_NATIVE_mpn_sub_nc
scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo);
#else
scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n);
scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo);
#endif
MPN_COPY (r1p + off, tp, this_n);
}
}
return 2 * acyo + scyo;
}
#ifdef MAIN
#include <stdlib.h>
#include <stdio.h>
#include "timing.h"
long cputime ();
int
main (int argc, char **argv)
{
mp_ptr r1p, r2p, s1p, s2p;
double t;
mp_size_t n;
n = strtol (argv[1], 0, 0);
r1p = malloc (n * BYTES_PER_MP_LIMB);
r2p = malloc (n * BYTES_PER_MP_LIMB);
s1p = malloc (n * BYTES_PER_MP_LIMB);
s2p = malloc (n * BYTES_PER_MP_LIMB);
TIME (t,(mpn_add_n(r1p,s1p,s2p,n),mpn_sub_n(r1p,s1p,s2p,n)));
printf (" separate add and sub: %.3f\n", t);
TIME (t,mpn_addsub_n(r1p,r2p,s1p,s2p,n));
printf ("combined addsub separate variables: %.3f\n", t);
TIME (t,mpn_addsub_n(r1p,r2p,r1p,s2p,n));
printf (" combined addsub r1 overlap: %.3f\n", t);
TIME (t,mpn_addsub_n(r1p,r2p,r1p,s2p,n));
printf (" combined addsub r2 overlap: %.3f\n", t);
TIME (t,mpn_addsub_n(r1p,r2p,r1p,r2p,n));
printf (" combined addsub in-place: %.3f\n", t);
return 0;
}
#endif

View file

@ -1,247 +0,0 @@
/* mpn_divrem_1 -- mpn by limb division.
Copyright 1991, 1993, 1994, 1996, 1998, 1999, 2000, 2002, 2003 Free Software
Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or (at your
option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#include "gmp.h"
#include "gmp-impl.h"
#include "longlong.h"
/* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,
meaning the quotient size where that should happen, the quotient size
being how many udiv divisions will be done.
The default is to use preinv always, CPUs where this doesn't suit have
tuned thresholds. Note in particular that preinv should certainly be
used if that's the only division available (USE_PREINV_ALWAYS). */
#ifndef DIVREM_1_NORM_THRESHOLD
#define DIVREM_1_NORM_THRESHOLD 0
#endif
#ifndef DIVREM_1_UNNORM_THRESHOLD
#define DIVREM_1_UNNORM_THRESHOLD 0
#endif
/* If the cpu only has multiply-by-inverse division (eg. alpha), then NORM
and UNNORM thresholds are 0 and only the inversion code is included.
If multiply-by-inverse is never viable, then NORM and UNNORM thresholds
will be MP_SIZE_T_MAX and only the plain division code is included.
Otherwise mul-by-inverse is better than plain division above some
threshold, and best results are obtained by having code for both present.
The main reason for separating the norm and unnorm cases is that not all
CPUs give zero for "n0 >> BITS_PER_MP_LIMB" which would arise in the
unnorm code used on an already normalized divisor.
If UDIV_NEEDS_NORMALIZATION is false then plain division uses the same
non-shifting code for both the norm and unnorm cases, though with
different criteria for skipping a division, and with different thresholds
of course. And in fact if inversion is never viable, then that simple
non-shifting division would be all that's left.
The NORM and UNNORM thresholds might not differ much, but if there's
going to be separate code for norm and unnorm then it makes sense to have
separate thresholds. One thing that's possible is that the
mul-by-inverse might be better only for normalized divisors, due to that
case not needing variable bit shifts.
Notice that the thresholds are tested after the decision to possibly skip
one divide step, so they're based on the actual number of divisions done.
For the unnorm case, it would be possible to call mpn_lshift to adjust
the dividend all in one go (into the quotient space say), rather than
limb-by-limb in the loop. This might help if mpn_lshift is a lot faster
than what the compiler can generate for EXTRACT. But this is left to CPU
specific implementations to consider, especially since EXTRACT isn't on
the dependent chain. */
mp_limb_t
mpn_divrem_1 (mp_ptr qp, mp_size_t qxn,
mp_srcptr up, mp_size_t un, mp_limb_t d)
{
mp_size_t n;
mp_size_t i;
mp_limb_t n1, n0;
mp_limb_t r = 0;
ASSERT (qxn >= 0);
ASSERT (un >= 0);
ASSERT (d != 0);
/* FIXME: What's the correct overlap rule when qxn!=0? */
ASSERT (MPN_SAME_OR_SEPARATE_P (qp+qxn, up, un));
n = un + qxn;
if (n == 0)
return 0;
d <<= GMP_NAIL_BITS;
qp += (n - 1); /* Make qp point at most significant quotient limb */
if ((d & GMP_LIMB_HIGHBIT) != 0)
{
if (un != 0)
{
/* High quotient limb is 0 or 1, skip a divide step. */
mp_limb_t q;
r = up[un - 1] << GMP_NAIL_BITS;
q = (r >= d);
*qp-- = q;
r -= (d & -q);
r >>= GMP_NAIL_BITS;
n--;
un--;
}
if (BELOW_THRESHOLD (n, DIVREM_1_NORM_THRESHOLD))
{
plain:
for (i = un - 1; i >= 0; i--)
{
n0 = up[i] << GMP_NAIL_BITS;
udiv_qrnnd (*qp, r, r, n0, d);
r >>= GMP_NAIL_BITS;
qp--;
}
for (i = qxn - 1; i >= 0; i--)
{
udiv_qrnnd (*qp, r, r, CNST_LIMB(0), d);
r >>= GMP_NAIL_BITS;
qp--;
}
return r;
}
else
{
/* Multiply-by-inverse, divisor already normalized. */
mp_limb_t dinv;
invert_limb (dinv, d);
for (i = un - 1; i >= 0; i--)
{
n0 = up[i] << GMP_NAIL_BITS;
udiv_qrnnd_preinv (*qp, r, r, n0, d, dinv);
r >>= GMP_NAIL_BITS;
qp--;
}
for (i = qxn - 1; i >= 0; i--)
{
udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv);
r >>= GMP_NAIL_BITS;
qp--;
}
return r;
}
}
else
{
/* Most significant bit of divisor == 0. */
int norm;
/* Skip a division if high < divisor (high quotient 0). Testing here
before normalizing will still skip as often as possible. */
if (un != 0)
{
n1 = up[un - 1] << GMP_NAIL_BITS;
if (n1 < d)
{
r = n1 >> GMP_NAIL_BITS;
*qp-- = 0;
n--;
if (n == 0)
return r;
un--;
}
}
if (! UDIV_NEEDS_NORMALIZATION
&& BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
goto plain;
count_leading_zeros (norm, d);
d <<= norm;
r <<= norm;
if (UDIV_NEEDS_NORMALIZATION
&& BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
{
if (un != 0)
{
n1 = up[un - 1] << GMP_NAIL_BITS;
r |= (n1 >> (GMP_LIMB_BITS - norm));
for (i = un - 2; i >= 0; i--)
{
n0 = up[i] << GMP_NAIL_BITS;
udiv_qrnnd (*qp, r, r,
(n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm)),
d);
r >>= GMP_NAIL_BITS;
qp--;
n1 = n0;
}
udiv_qrnnd (*qp, r, r, n1 << norm, d);
r >>= GMP_NAIL_BITS;
qp--;
}
for (i = qxn - 1; i >= 0; i--)
{
udiv_qrnnd (*qp, r, r, CNST_LIMB(0), d);
r >>= GMP_NAIL_BITS;
qp--;
}
return r >> norm;
}
else
{
mp_limb_t dinv;
invert_limb (dinv, d);
if (un != 0)
{
n1 = up[un - 1] << GMP_NAIL_BITS;
r |= (n1 >> (GMP_LIMB_BITS - norm));
for (i = un - 2; i >= 0; i--)
{
n0 = up[i] << GMP_NAIL_BITS;
udiv_qrnnd_preinv (*qp, r, r,
((n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm))),
d, dinv);
r >>= GMP_NAIL_BITS;
qp--;
n1 = n0;
}
udiv_qrnnd_preinv (*qp, r, r, n1 << norm, d, dinv);
r >>= GMP_NAIL_BITS;
qp--;
}
for (i = qxn - 1; i >= 0; i--)
{
udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv);
r >>= GMP_NAIL_BITS;
qp--;
}
return r >> norm;
}
}
}

View file

@ -1,181 +0,0 @@
/* mpn_divrem_2 -- Divide natural numbers, producing both remainder and
quotient. The divisor is two limbs.
THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES. IT IS
ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS
ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP
RELEASE.
Copyright 1993, 1994, 1995, 1996, 1999, 2000, 2001, 2002 Free Software
Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or (at your
option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#include "gmp.h"
#include "gmp-impl.h"
#include "longlong.h"
/* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,
meaning the quotient size where that should happen, the quotient size
being how many udiv divisions will be done.
The default is to use preinv always, CPUs where this doesn't suit have
tuned thresholds. Note in particular that preinv should certainly be
used if that's the only division available (USE_PREINV_ALWAYS). */
#ifndef DIVREM_2_THRESHOLD
#define DIVREM_2_THRESHOLD 0
#endif
/* Divide num (NP/NSIZE) by den (DP/2) and write
the NSIZE-2 least significant quotient limbs at QP
and the 2 long remainder at NP. If QEXTRA_LIMBS is
non-zero, generate that many fraction bits and append them after the
other quotient limbs.
Return the most significant limb of the quotient, this is always 0 or 1.
Preconditions:
0. NSIZE >= 2.
1. The most significant bit of the divisor must be set.
2. QP must either not overlap with the input operands at all, or
QP + 2 >= NP must hold true. (This means that it's
possible to put the quotient in the high part of NUM, right after the
remainder in NUM.
3. NSIZE >= 2, even if QEXTRA_LIMBS is non-zero. */
mp_limb_t
mpn_divrem_2 (mp_ptr qp, mp_size_t qxn,
mp_ptr np, mp_size_t nn,
mp_srcptr dp)
{
mp_limb_t most_significant_q_limb = 0;
mp_size_t i;
mp_limb_t n1, n0, n2;
mp_limb_t d1, d0;
mp_limb_t d1inv;
int use_preinv;
ASSERT (nn >= 2);
ASSERT (qxn >= 0);
ASSERT (dp[1] & GMP_NUMB_HIGHBIT);
ASSERT (! MPN_OVERLAP_P (qp, nn-2+qxn, np, nn) || qp+2 >= np);
ASSERT_MPN (np, nn);
ASSERT_MPN (dp, 2);
np += nn - 2;
d1 = dp[1];
d0 = dp[0];
n1 = np[1];
n0 = np[0];
if (n1 >= d1 && (n1 > d1 || n0 >= d0))
{
#if GMP_NAIL_BITS == 0
sub_ddmmss (n1, n0, n1, n0, d1, d0);
#else
n0 = n0 - d0;
n1 = n1 - d1 - (n0 >> GMP_LIMB_BITS - 1);
n0 &= GMP_NUMB_MASK;
#endif
most_significant_q_limb = 1;
}
use_preinv = ABOVE_THRESHOLD (qxn + nn - 2, DIVREM_2_THRESHOLD);
if (use_preinv)
invert_limb (d1inv, d1);
for (i = qxn + nn - 2 - 1; i >= 0; i--)
{
mp_limb_t q;
mp_limb_t r;
if (i >= qxn)
np--;
else
np[0] = 0;
if (n1 == d1)
{
/* Q should be either 111..111 or 111..110. Need special handling
of this rare case as normal division would give overflow. */
q = GMP_NUMB_MASK;
r = (n0 + d1) & GMP_NUMB_MASK;
if (r < d1) /* Carry in the addition? */
{
#if GMP_NAIL_BITS == 0
add_ssaaaa (n1, n0, r - d0, np[0], 0, d0);
#else
n0 = np[0] + d0;
n1 = (r - d0 + (n0 >> GMP_NUMB_BITS)) & GMP_NUMB_MASK;
n0 &= GMP_NUMB_MASK;
#endif
qp[i] = q;
continue;
}
n1 = d0 - (d0 != 0);
n0 = -d0 & GMP_NUMB_MASK;
}
else
{
if (use_preinv)
udiv_qrnnd_preinv (q, r, n1, n0, d1, d1inv);
else
udiv_qrnnd (q, r, n1, n0 << GMP_NAIL_BITS, d1 << GMP_NAIL_BITS);
r >>= GMP_NAIL_BITS;
umul_ppmm (n1, n0, d0, q << GMP_NAIL_BITS);
n0 >>= GMP_NAIL_BITS;
}
n2 = np[0];
q_test:
if (n1 > r || (n1 == r && n0 > n2))
{
/* The estimated Q was too large. */
q--;
#if GMP_NAIL_BITS == 0
sub_ddmmss (n1, n0, n1, n0, 0, d0);
#else
n0 = n0 - d0;
n1 = n1 - (n0 >> GMP_LIMB_BITS - 1);
n0 &= GMP_NUMB_MASK;
#endif
r += d1;
if (r >= d1) /* If not carry, test Q again. */
goto q_test;
}
qp[i] = q;
#if GMP_NAIL_BITS == 0
sub_ddmmss (n1, n0, r, n2, n1, n0);
#else
n0 = n2 - n0;
n1 = r - n1 - (n0 >> GMP_LIMB_BITS - 1);
n0 &= GMP_NUMB_MASK;
#endif
}
np[1] = n1;
np[0] = n0;
return most_significant_q_limb;
}

View file

@ -1,37 +0,0 @@
/* mpn_popcount, mpn_hamdist -- mpn bit population count/hamming distance.
Copyright 1994, 1996, 2000, 2001, 2002, 2005 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or (at your
option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#include "gmp.h"
#include "gmp-impl.h"
#define OPERATION_popcount 1
#define OPERATION_hamdist 0
#include "popham.c"
#undef OPERATION_popcount
#undef OPERATION_hamdist
#undef FNAME
#undef POPHAM
#define OPERATION_popcount 0
#define OPERATION_hamdist 1
#include "popham.c"

View file

@ -1,175 +0,0 @@
; Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2001, 2002 Free Software
; Foundation, Inc.
;
; This file is part of the GNU MP Library.
;
; The GNU MP Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License, or (at your option) any later version.
;
; The GNU MP Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the GNU MP Library; see the file COPYING.LIB. If
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
; Suite 330, Boston, MA 02111-1307, USA.
;
; Translation of AT&T syntax code by Brian Gladman
%include "x86i.inc"
; mp_limb_t M4_function_n (mp_ptr dst,mp_srcptr src1,mp_srcptr src2,
; mp_size_t size);
; mp_limb_t M4_function_nc (mp_ptr dst,mp_srcptr src1,mp_srcptr src2,
; mp_size_t size,mp_limb_t carry);
%define PARAM_SPACE 20
%define PARAM_CARRY esp+frame+20
%define PARAM_SIZE esp+frame+16
%define PARAM_SRC2 esp+frame+12
%define PARAM_SRC1 esp+frame+8
%define PARAM_DST esp+frame+4
%macro mac_sub 4
global %1%4
%ifdef DLL
export %1%4
%endif
align 8
%1%4:
%assign frame 0
FR_push edi
FR_push esi
mov edi,[PARAM_DST]
mov esi,[PARAM_SRC1]
mov edx,[PARAM_SRC2]
mov ecx,[PARAM_SIZE]
mov eax,ecx
shr ecx,3 ; compute count for unrolled %%4
neg eax
and eax,7 ; get index where to start %%4
jz %%3 ; necessary special case for 0
inc ecx ; adjust %%4 count
shl eax,2 ; adjustment for pointers...
sub edi,eax ; ... since they are offset ...
sub esi,eax ; ... by a constant when we ...
sub edx,eax ; ... enter the %%4
shr eax,2 ; restore previous value
; Calculate start address in %%4
%ifdef PIC
call %%1
%%1:
lea eax,[%%4-%%1-3+eax+eax*8]
add eax,[esp]
add esp,4
%else
lea eax,[%%4-3+eax+eax*8]
%endif
; These lines initialize carry from the 5th parameter. Should be
; possible to simplify.
FR_push ebp
mov ebp,[PARAM_CARRY]
shr ebp,1 ; shift bit 0 into carry
FR_pop ebp
jmp eax ; jump into %%4
global %1%3
%ifdef DLL
export %1%3
%endif
align 8
%1%3:
%assign frame 0
FR_push edi
FR_push esi
mov edi,[PARAM_DST]
mov esi,[PARAM_SRC1]
mov edx,[PARAM_SRC2]
mov ecx,[PARAM_SIZE]
mov eax,ecx
shr ecx,3 ; compute count for unrolled %%4
neg eax
and eax,7 ; get index where to start %%4
jz %%4 ; necessary special case for 0
inc ecx ; adjust %%4 count
shl eax,2 ; adjustment for pointers...
sub edi,eax ; ... since they are offset ...
sub esi,eax ; ... by a constant when we ...
sub edx,eax ; ... enter the %%4
shr eax,2 ; restore previous value
; Calculate start address in %%4 for PIC.
; Due to limitations in some assemblers,%%4-%%2-3
; cannot be put into the leal
%ifdef PIC
call %%2
%%2:
lea eax,[%%4-%%2-3+eax+eax*8]
add eax,[esp]
add esp,4
%else
lea eax,[%%4-3+eax+eax*8]
%endif
jmp eax ; jump into %%4
%%3:
FR_push ebp
mov ebp,[PARAM_CARRY]
shr ebp,1 ; shift bit 0 into carry
FR_pop ebp
align 8
%%4:
mov eax,[esi]
%2 eax,[edx]
mov [edi],eax
mov eax,[4+esi]
%2 eax,[edx+4]
mov [4+edi],eax
mov eax,[8+esi]
%2 eax,[edx+8]
mov [8+edi],eax
mov eax,[12+esi]
%2 eax,[edx+12]
mov [12+edi],eax
mov eax,[16+esi]
%2 eax,[edx+16]
mov [16+edi],eax
mov eax,[20+esi]
%2 eax,[edx+20]
mov [20+edi],eax
mov eax,[24+esi]
%2 eax,[edx+24]
mov [24+edi],eax
mov eax,[28+esi]
%2 eax,[edx+28]
mov [28+edi],eax
lea edi,[32+edi]
lea esi,[32+esi]
lea edx,[32+edx]
dec ecx
jnz %%4
sbb eax,eax
neg eax
pop esi
pop edi
ret
%endmacro
section .text
mac_sub ___g,adc,mpn_add_n,mpn_add_nc
mac_sub ___g,sbb,mpn_sub_n,mpn_sub_nc
end

View file

@ -1,113 +0,0 @@
; Copyright 1992, 1994, 1997, 1999, 2000, 2001, 2002 Free Software
; Foundation, Inc.
;
; This file is part of the GNU MP Library.
;
; The GNU MP Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License, or (at your option) any later version.
;
; The GNU MP Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the GNU MP Library; see the file COPYING.LIB. If
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
; Suite 330, Boston, MA 02111-1307, USA.
;
; Translation of AT&T syntax code by Brian Gladman
%include "x86i.inc"
%define PARAM_MULTIPLIER esp+frame+16
%define PARAM_SIZE esp+frame+12
%define PARAM_SRC esp+frame+8
%define PARAM_DST esp+frame+4
%assign frame 16
%macro mac_sub 3
global %1%3
%ifdef DLL
export %1%3
%endif
align 8
%1%3:
push edi
push esi
push ebx
push ebp
mov edi,[PARAM_DST]
mov esi,[PARAM_SRC]
mov ecx,[PARAM_SIZE]
xor ebx,ebx
and ecx,3
jz %%2
%%1:
mov eax,[esi]
mul dword [PARAM_MULTIPLIER]
lea esi,[4+esi]
add eax,ebx
mov ebx,0
adc edx,ebx
%2 [edi],eax
adc ebx,edx ; propagate carry into cylimb
lea edi,[4+edi]
dec ecx
jnz %%1
%%2:
mov ecx,[PARAM_SIZE]
shr ecx,2
jz %%4
align 8
%%3:
mov eax,[esi]
mul dword [PARAM_MULTIPLIER]
add ebx,eax
mov ebp,0
adc ebp,edx
mov eax,[4+esi]
mul dword [PARAM_MULTIPLIER]
%2 [edi],ebx
adc ebp,eax ; new lo + cylimb
mov ebx,0
adc ebx,edx
mov eax,[8+esi]
mul dword [PARAM_MULTIPLIER]
%2 [4+edi],ebp
adc ebx,eax ; new lo + cylimb
mov ebp,0
adc ebp,edx
mov eax,[12+esi]
mul dword [PARAM_MULTIPLIER]
%2 [8+edi],ebx
adc ebp,eax ; new lo + cylimb
mov ebx,0
adc ebx,edx
%2 [12+edi],ebp
adc ebx,0 ; propagate carry into cylimb
lea esi,[16+esi]
lea edi,[16+edi]
dec ecx
jnz %%3
%%4:
mov eax,ebx
pop ebp
pop ebx
pop esi
pop edi
ret
%endmacro
section .text
mac_sub ___g,add,mpn_addmul_1
mac_sub ___g,sub,mpn_submul_1
end

View file

@ -1,64 +0,0 @@
; Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
;
; This file is part of the GNU MP Library.
;
; The GNU MP Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License, or (at your option) any later version.
;
; The GNU MP Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the GNU MP Library; see the file COPYING.LIB. If
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
; Suite 330, Boston, MA 02111-1307, USA.
;
; Translation of AT&T syntax code by Brian Gladman
%include "x86i.inc"
global ___gmpn_copyd
%ifdef DLL
export ___gmpn_copyd
%endif
%define PARAM_SIZE esp+frame+12
%define PARAM_SRC esp+frame+8
%define PARAM_DST esp+frame+4
%assign frame 0
; eax saved esi
; ebx
; ecx counter
; edx saved edi
; esi src
; edi dst
; ebp
section .text
align 32
___gmpn_copyd:
mov ecx,[PARAM_SIZE]
mov eax,esi
mov esi,[PARAM_SRC]
mov edx,edi
mov edi,[PARAM_DST]
lea esi,[-4+esi+ecx*4]
lea edi,[-4+edi+ecx*4]
std
rep movsd
cld
mov esi,eax
mov edi,edx
ret
end

View file

@ -1,59 +0,0 @@
; Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
;
; This file is part of the GNU MP Library.
;
; The GNU MP Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License, or (at your option) any later version.
;
; The GNU MP Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the GNU MP Library; see the file COPYING.LIB. If
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
; Suite 330, Boston, MA 02111-1307, USA.
;
; Translation of AT&T syntax code by Brian Gladman
%include "x86i.inc"
global ___gmpn_copyi
%ifdef DLL
export ___gmpn_copyi
%endif
%define PARAM_SIZE esp+frame+12
%define PARAM_SRC esp+frame+8
%define PARAM_DST esp+frame+4
%assign frame 0
section .text
align 32
; eax saved esi
; ebx
; ecx counter
; edx saved edi
; esi src
; edi dst
; ebp
___gmpn_copyi:
mov ecx,[PARAM_SIZE]
mov eax,esi
mov esi,[PARAM_SRC]
mov edx,edi
mov edi,[PARAM_DST]
cld ; better safe than sorry,see mpn/x86/README
rep movsd
mov esi,eax
mov edi,edx
ret
end

View file

@ -1,144 +0,0 @@
; Copyright 2001, 2002 Free Software Foundation, Inc.
;
; This file is part of the GNU MP Library.
;
; The GNU MP Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License, or (at your option) any later version.
;
; The GNU MP Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the GNU MP Library; see the file COPYING.LIB. If
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
; Suite 330, Boston, MA 02111-1307, USA.
;
; Translation of AT&T syntax code by Brian Gladman
%include "x86i.inc"
%define PARAM_DIVISOR esp+frame+16
%define PARAM_SIZE esp+frame+12
%define PARAM_SRC esp+frame+8
%define PARAM_DST esp+frame+4
%define VAR_INVERSE PARAM_SRC
%assign frame 0
section .text
extern ___gmp_modlimb_invert_table
global ___gmpn_divexact_1
%ifdef DLL
export ___gmpn_divexact_1
%endif
align 16
___gmpn_divexact_1:
mov eax,[PARAM_DIVISOR]
FR_push ebp
mov ebp,[PARAM_SIZE]
FR_push edi
FR_push ebx
mov ecx,-1 ; shift count
FR_push esi
Lstrip_twos:
inc ecx
shr eax,1
jnc Lstrip_twos
lea ebx,[1+eax+eax] ; d without twos
and eax,127 ; d/2,7 bits
%ifdef PIC
call Lmovl_eip_edx
add edx,_GLOBAL_OFFSET_TABLE_
mov edx,[___gmp_modlimb_invert_table+edx]
movzx eax,byte [eax+edx] ; inv 8 bits
%else
movzx eax,byte [___gmp_modlimb_invert_table+eax] ; inv 8 bits
%endif
lea edx,[eax+eax] ; 2*inv
mov [PARAM_DIVISOR],ebx ; d without twos
imul eax,eax ; inv*inv
mov esi,[PARAM_SRC]
mov edi,[PARAM_DST]
imul eax,ebx ; inv*inv*d
sub edx,eax ; inv = 2*inv - inv*inv*d
lea eax,[edx+edx] ; 2*inv
imul edx,edx ; inv*inv
lea esi,[esi+ebp*4] ; src end
lea edi,[edi+ebp*4] ; dst end
neg ebp ; -size
imul edx,ebx ; inv*inv*d
sub eax,edx ; inv = 2*inv - inv*inv*d
%ifdef ASSERT
FR_push eax
imul eax,[PARAM_DIVISOR]
cmp eax,1
FR_pop eax
%endif
mov [VAR_INVERSE],eax
mov eax,[esi+ebp*4] ; src[0]
xor ebx,ebx
xor edx,edx
inc ebp
jz Lone
mov edx,[esi+ebp*4] ; src[1]
shrd eax,edx,cl
mov edx,[VAR_INVERSE]
jmp Lentry
align 8
nop ; k6 code alignment
nop
; eax q
; ebx carry bit,0 or -1
; ecx shift
; edx carry limb
; esi src end
; edi dst end
; ebp counter,limbs,negative
Ltop:
mov eax,[-4+esi+ebp*4]
sub edx,ebx ; accumulate carry bit
mov ebx,[esi+ebp*4]
shrd eax,ebx,cl
sub eax,edx ; apply carry limb
mov edx,[VAR_INVERSE]
sbb ebx,ebx
Lentry:
imul eax,edx
mov [-4+edi+ebp*4],eax
mov edx,[PARAM_DIVISOR]
mul edx
inc ebp
jnz Ltop
mov eax,[-4+esi] ; src high limb
Lone:
shr eax,cl
FR_pop esi
add eax,ebx ; apply carry bit
FR_pop ebx
sub eax,edx ; apply carry limb
imul eax,[VAR_INVERSE]
mov [-4+edi],eax
pop edi
pop ebp
ret
%ifdef PIC
Lmovl_eip_edx:
mov edx,[esp]
ret
%endif
end

View file

@ -1,88 +0,0 @@
; Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
;
; This file is part of the GNU MP Library.
;
; The GNU MP Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License, or (at your option) any later version.
;
; The GNU MP Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the GNU MP Library; see the file COPYING.LIB. If
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
; Suite 330, Boston, MA 02111-1307, USA.
;
; Translation of AT&T syntax code by Brian Gladman
%include "x86i.inc"
%define PARAM_CARRY esp+frame+16
%define PARAM_SIZE esp+frame+12
%define PARAM_SRC esp+frame+8
%define PARAM_DST esp+frame+4
%assign frame 0
; multiplicative inverse of 3,modulo 2^32
; ceil(b/3) and ceil(b*2/3) where b=2^32
%define INVERSE_3 0xAAAAAAAB
%define ONE_THIRD_CEIL 0x55555556
%define TWO_THIRDS_CEIL 0xAAAAAAAB
section .text
global ___gmpn_divexact_by3c
%ifdef DLL
export ___gmpn_divexact_by3c
%endif
align 8
___gmpn_divexact_by3c:
mov ecx,[PARAM_SRC]
FR_push ebp
mov ebp,[PARAM_SIZE]
FR_push edi
mov edi,[PARAM_DST]
FR_push esi
mov esi,INVERSE_3
FR_push ebx
lea ecx,[ecx+ebp*4]
mov ebx,[PARAM_CARRY]
lea edi,[edi+ebp*4]
neg ebp
; eax scratch,low product
; ebx carry limb (0 to 3)
; ecx &src[size]
; edx scratch,high product
; esi multiplier
; edi &dst[size]
; ebp counter,limbs,negative
align 8
Ltop:
mov eax,[ecx+ebp*4]
sub eax,ebx
setc bl
imul esi
cmp eax,ONE_THIRD_CEIL
mov [edi+ebp*4],eax
sbb ebx,-1 ; +1 if eax>=ceil(b/3)
cmp eax,TWO_THIRDS_CEIL
sbb ebx,-1 ; +1 if eax>=ceil(b*2/3)
inc ebp
jnz Ltop
mov eax,ebx
pop ebx
pop esi
pop edi
pop ebp
ret
end

View file

@ -1,140 +0,0 @@
; Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
;
; This file is part of the GNU MP Library.
;
; The GNU MP Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License, or (at your option) any later version.
;
; The GNU MP Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the GNU MP Library; see the file COPYING.LIB. If
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
; Suite 330, Boston, MA 02111-1307, USA.
;
; Translation of AT&T syntax code by Brian Gladman
%include "x86i.inc"
%define PARAM_CARRY esp+frame+24
%define PARAM_DIVISOR esp+frame+20
%define PARAM_SIZE esp+frame+16
%define PARAM_SRC esp+frame+12
%define PARAM_XSIZE esp+frame+8
%define PARAM_DST esp+frame+4
section .text
global ___gmpn_divrem_1c
%ifdef DLL
export ___gmpn_divrem_1c
%endif
align 16
___gmpn_divrem_1c:
%assign frame 0
mov ecx,[PARAM_SIZE]
FR_push edi
mov edi,[PARAM_SRC]
FR_push esi
mov esi,[PARAM_DIVISOR]
FR_push ebx
mov ebx,[PARAM_DST]
FR_push ebp
mov ebp,[PARAM_XSIZE]
or ecx,ecx
mov edx,[PARAM_CARRY]
jz Lfraction
lea ebx,[-4+ebx+ebp*4] ; dst one limb below integer part
jmp Linteger_top
global ___gmpn_divrem_1
%ifdef DLL
export ___gmpn_divrem_1
%endif
align 16
___gmpn_divrem_1:
%assign frame 0
mov ecx,[PARAM_SIZE]
FR_push edi
mov edi,[PARAM_SRC]
FR_push esi
mov esi,[PARAM_DIVISOR]
or ecx,ecx
jz Lsize_zero
FR_push ebx
mov eax,[-4+edi+ecx*4] ; src high limb
xor edx,edx
mov ebx,[PARAM_DST]
FR_push ebp
mov ebp,[PARAM_XSIZE]
cmp eax,esi
lea ebx,[-4+ebx+ebp*4] ; dst one limb below integer part
jae Linteger_entry
; high<divisor,so high of dst is zero,and avoid one div
mov [ebx+ecx*4],edx
dec ecx
mov edx,eax
jz Lfraction
; eax scratch (quotient)
; ebx dst+4*xsize-4
; ecx counter
; edx scratch (remainder)
; esi divisor
; edi src
; ebp xsize
Linteger_top:
mov eax,[-4+edi+ecx*4]
Linteger_entry:
div esi
mov [ebx+ecx*4],eax
dec ecx
jnz Linteger_top
Lfraction:
or ecx,ebp
jz Ldone
mov ebx,[PARAM_DST]
; eax scratch (quotient)
; ebx dst
; ecx counter
; edx scratch (remainder)
; esi divisor
; edi
; ebp
Lfraction_top:
xor eax,eax
div esi
mov [-4+ebx+ecx*4],eax
dec ecx
jnz Lfraction_top
Ldone:
pop ebp
mov eax,edx
pop ebx
pop esi
pop edi
ret
Lsize_zero:
mov ecx,[PARAM_XSIZE]
xor eax,eax
mov edi,[PARAM_DST]
cld ; better safe than sorry,see mpn/x86/README
rep stosd
pop esi
pop edi
ret
end

View file

@ -1,42 +0,0 @@
/* Generic x86 gmp-mparam.h -- Compiler/machine parameter header file.
Copyright 1991, 1993, 1994, 2000, 2001, 2002 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or (at your
option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
MA 02111-1307, USA. */
#ifndef GMP_MPARAM_H
#define GMP_MPARAM_H
#ifndef BITS_PER_MP_LIMB
#define BITS_PER_MP_LIMB 32
#elif BITS_PER_MP_LIMB != 32
#error Bad configuration in gmp-mparam.h
#endif
#ifndef BYTES_PER_MP_LIMB
#define BYTES_PER_MP_LIMB 4
#elif BYTES_PER_MP_LIMB != 4
#error Bad configuration in gmp-mparam.h
#endif
/* Generic x86 mpn_divexact_1 is faster than generic x86 mpn_divrem_1 on all
of p5, p6, k6 and k7, so use it always. It's probably slower on 386 and
486, but that's too bad. */
#define DIVEXACT_1_THRESHOLD 0
#endif

View file

@ -1,86 +0,0 @@
; Copyright 1992, 1994, 1996, 1999, 2000, 2001, 2002 Free Software
; Foundation, Inc.
;
; This file is part of the GNU MP Library.
;
; The GNU MP Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License, or (at your option) any later version.
;
; The GNU MP Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the GNU MP Library; see the file COPYING.LIB. If
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
; Suite 330, Boston, MA 02111-1307, USA.
;
; Translation of AT&T syntax code by Brian Gladman
%include "x86i.inc"
%define PARAM_SHIFT esp+frame+16
%define PARAM_SIZE esp+frame+12
%define PARAM_SRC esp+frame+8
%define PARAM_DST esp+frame+4
section .text
global ___gmpn_lshift
%ifdef DLL
export ___gmpn_lshift
%endif
align 8
___gmpn_lshift:
push edi
push esi
push ebx
%assign frame frame+12
mov edi,[PARAM_DST]
mov esi,[PARAM_SRC]
mov edx,[PARAM_SIZE]
mov ecx,[PARAM_SHIFT]
sub esi,4 ; adjust src
mov ebx,[esi+edx*4] ; read most significant limb
xor eax,eax
shld eax,ebx,cl
dec edx
jz Lend
push eax ; push carry limb onto stack
test dl,1
jnz L1 ; enter Lop in the middle
mov eax,ebx
align 8
Lop:
mov ebx,[esi+edx*4] ; load next lower limb
shld eax,ebx,cl
mov [edi+edx*4],eax ; store it
dec edx
L1:
mov eax,[esi+edx*4]
shld ebx,eax,cl
mov [edi+edx*4],ebx
dec edx
jnz Lop
shl eax,cl ; compute least significant limb
mov [edi],eax ; store it
pop eax ; pop carry limb
pop ebx
pop esi
pop edi
ret
Lend:
shl ebx,cl ; compute least significant limb
mov [edi],ebx ; store it
pop ebx
pop esi
pop edi
ret
end

View file

@ -1,99 +0,0 @@
; Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
;
; This file is part of the GNU MP Library.
;
; The GNU MP Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License, or (at your option) any later version.
;
; The GNU MP Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the GNU MP Library; see the file COPYING.LIB. If
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
; Suite 330, Boston, MA 02111-1307, USA.
;
; Translation of AT&T syntax code by Brian Gladman
%include "x86i.inc"
%define PARAM_CARRY esp+frame+16
%define PARAM_DIVISOR esp+frame+12
%define PARAM_SIZE esp+frame+8
%define PARAM_SRC esp+frame+4
section .text
global ___gmpn_mod_1c
%ifdef DLL
export ___gmpn_mod_1c
%endif
align 16
___gmpn_mod_1c:
%assign frame 0
mov ecx,[PARAM_SIZE]
FR_push ebx
mov ebx,[PARAM_SRC]
FR_push esi
mov esi,[PARAM_DIVISOR]
or ecx,ecx
mov edx,[PARAM_CARRY]
jnz Ltop
pop esi
mov eax,edx
pop ebx
ret
global ___gmpn_mod_1
%ifdef DLL
export ___gmpn_mod_1
%endif
align 16
___gmpn_mod_1:
%assign frame 0
mov ecx,[PARAM_SIZE]
FR_push ebx
mov ebx,[PARAM_SRC]
FR_push esi
or ecx,ecx
jz Ldone_zero
mov esi,[PARAM_DIVISOR]
mov eax,[-4+ebx+ecx*4] ; src high limb
cmp eax,esi
sbb edx,edx ; -1 if high<divisor
add ecx,edx ; skip one division if high<divisor
jz Ldone_eax
and edx,eax ; carry if high<divisor
; eax scratch (quotient)
; ebx src
; ecx counter
; edx carry (remainder)
; esi divisor
; edi
; ebp
Ltop:
mov eax,[-4+ebx+ecx*4]
div esi
dec ecx
jnz Ltop
mov eax,edx
Ldone_eax:
pop esi
pop ebx
ret
Ldone_zero:
pop esi
xor eax,eax
pop ebx
ret
end

View file

@ -1,141 +0,0 @@
; Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
;
; This file is part of the GNU MP Library.
;
; The GNU MP Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License, or (at your option) any later version.
;
; The GNU MP Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the GNU MP Library; see the file COPYING.LIB. If
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
; Suite 330, Boston, MA 02111-1307, USA.
;
; Translation of AT&T syntax code by Brian Gladman
%include "x86i.inc"
section .text
%define PARAM_SIZE esp+frame+8
%define PARAM_SRC esp+frame+4
; re-use parameter space
%define SAVE_EBX PARAM_SRC
global ___gmpn_mod_34lsub1
%ifdef DLL
export ___gmpn_mod_34lsub1
%endif
align 16
___gmpn_mod_34lsub1:
%assign frame 0
mov ecx,[PARAM_SIZE]
mov edx,[PARAM_SRC]
sub ecx,2
ja Lthree_or_more
mov eax,[edx]
jb Lone
mov ecx,[4+edx]
mov edx,eax
shr eax,24 ; src[0] low
and edx,0xFFFFFF ; src[0] high
add eax,edx
mov edx,ecx
and ecx,0xFFFF
shr edx,16 ; src[1] high
add eax,edx
shl ecx,8 ; src[1] low
add eax,ecx
Lone:
ret
; eax
; ebx
; ecx size-2
; edx src
; esi
; edi
; ebp
Lthree_or_more:
mov [SAVE_EBX],ebx ; and arrange 16-byte loop alignment
xor ebx,ebx
FR_push esi
xor esi,esi
FR_push edi
xor eax,eax ; and clear carry flag
; offset 0x40 here
; eax acc 0mod3
; ebx acc 1mod3
; ecx counter,limbs
; edx src
; esi acc 2mod3
; edi
; ebp
Ltop:
lea edx,[12+edx]
lea ecx,[-2+ecx]
adc eax,[-12+edx]
adc ebx,[-8+edx]
adc esi,[-4+edx]
dec ecx
jg Ltop
; ecx is -2,-1 or 0 representing 0,1 or 2 more limbs,respectively
mov edi,0xFFFFFFFF
inc ecx
js Lcombine
adc eax,[edx]
mov edi,0xFFFFFF00
dec ecx
js Lcombine
adc ebx,[4+edx]
mov edi,0xFFFF0000
; eax acc 0mod3
; ebx acc 1mod3
; ecx
; edx
; esi acc 2mod3
; edi mask
; ebp
Lcombine:
sbb ecx,ecx ; carry
mov edx,eax ; 0mod3
shr eax,24 ; 0mod3 high
and ecx,edi ; carry masked
sub eax,ecx ; apply carry
mov edi,ebx ; 1mod3
shr ebx,16 ; 1mod3 high
and edx,0x00FFFFFF ; 0mod3 low
add eax,edx ; apply 0mod3 low
and edi,0xFFFF
shl edi,8 ; 1mod3 low
add eax,ebx ; apply 1mod3 high
add eax,edi ; apply 1mod3 low
mov edx,esi ; 2mod3
shr esi,8 ; 2mod3 high
and edx,0xFF ; 2mod3 low
shl edx,16 ; 2mod3 low
add eax,esi ; apply 2mod3 high
add eax,edx ; apply 2mod3 low
FR_pop edi
mov ebx,[SAVE_EBX]
FR_pop esi
ret
end

View file

@ -1,107 +0,0 @@
; Copyright 1992, 1994, 1997, 1998, 1999, 2000, 2001, 2002 Free Software
; Foundation, Inc.
;
; This file is part of the GNU MP Library.
;
; The GNU MP Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License, or (at your option) any later version.
;
; The GNU MP Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the GNU MP Library; see the file COPYING.LIB. If
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
; Suite 330, Boston, MA 02111-1307, USA.
;
; Translation of AT&T syntax code by Brian Gladman
%include "x86i.inc"
%define PARAM_MULTIPLIER esp+frame+16
%define PARAM_SIZE esp+frame+12
%define PARAM_SRC esp+frame+8
%define PARAM_DST esp+frame+4
%assign frame 0
section .text
global ___gmpn_mul_1
%ifdef DLL
export ___gmpn_mul_1
%endif
align 8
___gmpn_mul_1:
push edi
push esi
push ebx
push ebp
%assign frame frame+16
mov edi,[PARAM_DST]
mov esi,[PARAM_SRC]
mov ecx,[PARAM_SIZE]
xor ebx,ebx
and ecx,3
jz Lend0
Loop0:
mov eax,[esi]
mul dword [PARAM_MULTIPLIER]
lea esi,[4+esi]
add eax,ebx
mov ebx,0
adc edx,ebx
mov [edi],eax
mov ebx,edx ; propagate carry into cylimb
lea edi,[4+edi]
dec ecx
jnz Loop0
Lend0:
mov ecx,[PARAM_SIZE]
shr ecx,2
jz Lend
align 8
Lop:
mov eax,[esi]
mul dword [PARAM_MULTIPLIER]
add ebx,eax
mov ebp,0
adc ebp,edx
mov eax,[4+esi]
mul dword [PARAM_MULTIPLIER]
mov [edi],ebx
add ebp,eax ; new lo + cylimb
mov ebx,0
adc ebx,edx
mov eax,[8+esi]
mul dword [PARAM_MULTIPLIER]
mov [4+edi],ebp
add ebx,eax ; new lo + cylimb
mov ebp,0
adc ebp,edx
mov eax,[12+esi]
mul dword [PARAM_MULTIPLIER]
mov [8+edi],ebx
add ebp,eax ; new lo + cylimb
mov ebx,0
adc ebx,edx
mov [12+edi],ebp
lea esi,[16+esi]
lea edi,[16+edi]
dec ecx
jnz Lop
Lend:
mov eax,ebx
pop ebp
pop ebx
pop esi
pop edi
ret
end

View file

@ -1,176 +0,0 @@
; Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002 Free Software
; Foundation, Inc.
;
; This file is part of the GNU MP Library.
;
; The GNU MP Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License, or (at your option) any later version.
;
; The GNU MP Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the GNU MP Library; see the file COPYING.LIB. If
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
; Suite 330, Boston, MA 02111-1307, USA.
;
; Translation of AT&T syntax code by Brian Gladman
%include "x86i.inc"
%define VAR_STACK_SPACE 8
%define PARAM_YSIZE esp+frame+20
%define PARAM_YP esp+frame+16
%define PARAM_XSIZE esp+frame+12
%define PARAM_XP esp+frame+8
%define PARAM_WP esp+frame+4
%define VAR_MULTIPLIER esp+frame-4
%define VAR_COUNTER esp+frame-8
section .text
global ___gmpn_mul_basecase
%ifdef DLL
export ___gmpn_mul_basecase
%endif
align 8
___gmpn_mul_basecase:
sub esp,VAR_STACK_SPACE
push esi
push ebp
push edi
%assign frame VAR_STACK_SPACE+12
mov esi,[PARAM_XP]
mov edi,[PARAM_WP]
mov ebp,[PARAM_YP]
mov eax,[esi] ; load xp[0]
mul dword [ebp] ; multiply by yp[0]
mov [edi],eax ; store to wp[0]
mov ecx,[PARAM_XSIZE] ; xsize
dec ecx ; If xsize = 1,ysize = 1 too
jz Ldone
FR_push ebx
mov ebx,edx
lea esi,[4+esi]
lea edi,[4+edi]
LoopM:
mov eax,[esi] ; load next limb at xp[j]
lea esi,[4+esi]
mul dword [ebp]
add eax,ebx
mov ebx,edx
adc ebx,0
mov [edi],eax
lea edi,[4+edi]
dec ecx
jnz LoopM
mov [edi],ebx ; most significant limb of product
add edi,4 ; increment wp
mov eax,[PARAM_XSIZE]
shl eax,2
sub edi,eax
sub esi,eax
mov eax,[PARAM_YSIZE] ; ysize
dec eax
jz Lskip
mov [VAR_COUNTER],eax ; set index i to ysize
Louter:
mov ebp,[PARAM_YP] ; yp
add ebp,4 ; make ebp point to next v limb
mov [PARAM_YP],ebp
mov eax,[ebp] ; copy y limb ...
mov [VAR_MULTIPLIER],eax ; ... to stack slot
mov ecx,[PARAM_XSIZE]
xor ebx,ebx
and ecx,3
jz Lend0
Loop0:
mov eax,[esi]
mul dword [VAR_MULTIPLIER]
lea esi,[4+esi]
add eax,ebx
mov ebx,0
adc edx,ebx
add [edi],eax
adc ebx,edx ; propagate carry into cylimb
lea edi,[4+edi]
dec ecx
jnz Loop0
Lend0:
mov ecx,[PARAM_XSIZE]
shr ecx,2
jz LendX
align 8
LoopX:
mov eax,[esi]
mul dword [VAR_MULTIPLIER]
add ebx,eax
mov ebp,0
adc ebp,edx
mov eax,[4+esi]
mul dword [VAR_MULTIPLIER]
add [edi],ebx
adc ebp,eax ; new lo + cylimb
mov ebx,0
adc ebx,edx
mov eax,[8+esi]
mul dword [VAR_MULTIPLIER]
add [4+edi],ebp
adc ebx,eax ; new lo + cylimb
mov ebp,0
adc ebp,edx
mov eax,[12+esi]
mul dword [VAR_MULTIPLIER]
add [8+edi],ebx
adc ebp,eax ; new lo + cylimb
mov ebx,0
adc ebx,edx
add [12+edi],ebp
adc ebx,0 ; propagate carry into cylimb
lea esi,[16+esi]
lea edi,[16+edi]
dec ecx
jnz LoopX
LendX:
mov [edi],ebx
add edi,4
; we incremented wp and xp in the loop above; compensate
mov eax,[PARAM_XSIZE]
shl eax,2
sub edi,eax
sub esi,eax
mov eax,[VAR_COUNTER]
dec eax
mov [VAR_COUNTER],eax
jnz Louter
Lskip:
pop ebx
pop edi
pop ebp
pop esi
add esp,8
ret
Ldone:
mov [4+edi],edx ; store to wp[1]
pop edi
pop ebp
pop esi
add esp,8
ret
end

View file

@ -1,226 +0,0 @@
; Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
;
; This file is part of the GNU MP Library.
;
; The GNU MP Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License, or (at your option) any later version.
;
; The GNU MP Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the GNU MP Library; see the file COPYING.LIB. If
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
; Suite 330, Boston, MA 02111-1307, USA.
;
; Translation of AT&T syntax code by Brian Gladman
%include "..\x86i.inc"
%define UNROLL_LOG2 4
%define UNROLL_COUNT (1 << UNROLL_LOG2)
%define UNROLL_MASK UNROLL_COUNT-1
%define UNROLL_BYTES 4*UNROLL_COUNT
%ifdef PIC
%define UNROLL_THRESHOLD 5
%else
%define UNROLL_THRESHOLD 5
%endif
%define PARAM_CARRY esp+frame+20
%define PARAM_MULTIPLIER esp+frame+16
%define PARAM_SIZE esp+frame+12
%define PARAM_SRC esp+frame+8
%define PARAM_DST esp+frame+4
%macro mul_fun 4
global %1%3
global %1%4
%ifdef DLL
export %1%3
export %1%4
%endif
align 32
%define frame 0
%1%4:
FR_push ebx
mov ebx,[PARAM_CARRY]
jmp %%Lstart_nc
%define frame 0
%1%3:
FR_push ebx
xor ebx,ebx ; initial carry
%%Lstart_nc:
mov ecx,[PARAM_SIZE]
FR_push esi
mov esi,[PARAM_SRC]
FR_push edi
mov edi,[PARAM_DST]
FR_push ebp
cmp ecx,UNROLL_THRESHOLD
mov ebp,[PARAM_MULTIPLIER]
jae %%Lunroll
; simple loop
; this is offset 0x22,so close enough to aligned
; eax scratch
; ebx carry
; ecx counter
; edx scratch
; esi src
; edi dst
; ebp multiplier
%%Lsimple:
mov eax,[esi]
add edi,4
mul ebp
add eax,ebx
adc edx,0
%2 [edi-4],eax
mov ebx,edx
adc ebx,0
dec ecx
lea esi,[4+esi]
jnz %%Lsimple
pop ebp
pop edi
pop esi
mov eax,ebx
pop ebx
ret
; VAR_JUMP holds the computed jump temporarily because there's not enough
; registers when doing the mul for the initial two carry limbs.
;
; The add/adc for the initial carry in %ebx is necessary only for the
; mpn_add/submul_1c entry points. Duplicating the startup code to
; eliminiate this for the plain mpn_add/submul_1 doesn't seem like a good
; idea.
;
; overlapping with parameters already fetched
%define VAR_COUNTER PARAM_SIZE
%define VAR_JUMP PARAM_DST
; VAL1 = ifelse(UNROLL_BYTES,256,128)
%define VAL1 128
; VAL2 = ifelse(UNROLL_BYTES,256,-128)
%define VAL2 -128
; this is offset 0x43,so close enough to aligned
; eax
; ebx initial carry
; ecx size
; edx
; esi src
; edi dst
; ebp
%%Lunroll:
mov edx,ecx
dec ecx
sub edx,2
neg ecx
shr edx,UNROLL_LOG2
and ecx,UNROLL_MASK
mov [VAR_COUNTER],edx
mov edx,ecx
; 15 code bytes per limb
%ifdef PIC
call %%Lhere
%%Lhere:
shl edx,4
neg ecx
lea edx,[edx+ecx*1]
add edx,%%Lentry-%%Lhere
add edx,[esp]
add esp,4
%else
shl edx,4
neg ecx
lea edx,[%%Lentry+edx+ecx]
%endif
mov eax,[esi] ; src low limb
mov [VAR_JUMP],edx
lea esi,[VAL1+4+esi+ecx*4]
mul ebp
add eax,ebx ; initial carry (from _1c)
adc edx,0
mov ebx,edx ; high carry
lea edi,[VAL1+edi+ecx*4]
mov edx,[VAR_JUMP]
test ecx,1
mov ecx,eax ; low carry
cmovnz ecx,ebx
cmovnz ebx,eax
jmp edx
; eax scratch
; ebx carry hi
; ecx carry lo
; edx scratch
; esi src
; edi dst
; ebp multiplier
;
; VAR_COUNTER loop counter
;
; 15 code bytes per limb
%define CHUNK_COUNT 2
align 32
%%Ltop:
add edi,UNROLL_BYTES
%%Lentry:
%assign disp VAL2
%rep UNROLL_COUNT/CHUNK_COUNT
mov eax,[byte disp+esi]
mul ebp
%2 [byte disp+edi],ecx
adc ebx,eax
mov ecx,edx
adc ecx,0
mov eax,[byte disp+4+esi]
mul ebp
%2 [byte disp+4+edi],ebx
adc ecx,eax
mov ebx,edx
adc ebx,0
%assign disp disp+4*CHUNK_COUNT
%endrep
dec dword [VAR_COUNTER]
lea esi,[UNROLL_BYTES+esi]
jns %%Ltop
%assign disp UNROLL_BYTES+VAL2
%2 [disp+edi],ecx
mov eax,ebx
pop ebp
pop edi
pop esi
pop ebx
adc eax,0
ret
%endmacro
section .text
mul_fun ___g,add,mpn_addmul_1,mpn_addmul_1c
mul_fun ___g,sub,mpn_submul_1,mpn_submul_1c
end

View file

@ -1,119 +0,0 @@
; Copyright 2001, 2002 Free Software Foundation, Inc.
;
; This file is part of the GNU MP Library.
;
; The GNU MP Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License, or (at your option) any later version.
;
; The GNU MP Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the GNU MP Library; see the file COPYING.LIB. If
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
; Suite 330, Boston, MA 02111-1307, USA.
;
; Translation of AT&T syntax code by Brian Gladman
%include "..\x86i.inc"
global ___gmpn_copyd
%ifdef DLL
export ___gmpn_copyd
%endif
%define PARAM_SIZE esp+frame+12
%define PARAM_SRC esp+frame+8
%define PARAM_DST esp+frame+4
%define SAVE_ESI PARAM_SIZE
%define SAVE_EDI PARAM_SRC
%define frame 0
section .text
align 16
___gmpn_copyd:
mov ecx,[PARAM_SIZE]
mov [SAVE_ESI],esi
mov esi,[PARAM_SRC]
mov [SAVE_EDI],edi
mov edi,[PARAM_DST]
sub ecx,1
jb Lzero
mov eax,[esi+ecx*4] ; src[size-1]
jz Lone
mov edx,[-4+esi+ecx*4] ; src[size-2]
sub ecx,2
jbe Ldone_loop ; 2 or 3 limbs only
; The usual overlap is
;
; high low
; +------------------+
; | dst|
; +------------------+
; +------------------+
; | src|
; +------------------+
;
; We can use an incrementing copy in the following circumstances.
;
; src+4*size<=dst,since then the regions are disjoint
;
; src==dst,clearly (though this shouldn't occur normally)
;
; src>dst,since in that case it's a requirement of the
; parameters that src>=dst+size*4,and hence the
; regions are disjoint
;
; eax prev high limb
; ebx
; ecx counter,size-3 down to 0 or -1,inclusive,by 2s
; edx prev low limb
; esi src
; edi dst
; ebp
lea edx,[edi+ecx*4]
cmp esi,edi
jae Luse_movsl ; src >= dst
cmp edx,edi
mov edx,[4+esi+ecx*4] ; src[size-2] again
jbe Luse_movsl ; src+4*size <= dst
Ltop:
mov [8+edi+ecx*4],eax
mov eax,[esi+ecx*4]
mov [4+edi+ecx*4],edx
mov edx,[-4+esi+ecx*4]
sub ecx,2
jnbe Ltop
Ldone_loop:
mov [8+edi+ecx*4],eax
mov [4+edi+ecx*4],edx
; copy low limb (needed if size was odd,but will already have been
; done in the loop if size was even)
mov eax,[esi]
Lone:
mov [edi],eax
mov edi,[SAVE_EDI]
mov esi,[SAVE_ESI]
ret
Luse_movsl:
add ecx,3
cld
rep movsd
Lzero:
mov esi,[SAVE_ESI]
mov edi,[SAVE_EDI]
ret
end

View file

@ -1,192 +0,0 @@
; Copyright 2001, 2002 Free Software Foundation, Inc.
;
; This file is part of the GNU MP Library.
;
; The GNU MP Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License, or (at your option) any later version.
;
; The GNU MP Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the GNU MP Library; see the file COPYING.LIB. If
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
; Suite 330, Boston, MA 02111-1307, USA.
;
; Translation of AT&T syntax code by Brian Gladman
%include "..\x86i.inc"
extern ___gmp_modlimb_invert_table
global ___gmpn_divexact_1
%ifdef DLL
export ___gmpn_divexact_1
%endif
%define PARAM_DIVISOR esp+frame+16
%define PARAM_SIZE esp+frame+12
%define PARAM_SRC esp+frame+8
%define PARAM_DST esp+frame+4
%define SAVE_EBX esp+frame-4
%define SAVE_ESI esp+frame-8
%define SAVE_EDI esp+frame-12
%define SAVE_EBP esp+frame-16
%define VAR_INVERSE esp+frame-20
%define STACK_SPACE 20
%define frame 0
section .text
align 16
___gmpn_divexact_1:
mov eax,[PARAM_DIVISOR]
sub esp,STACK_SPACE
FR_sesp STACK_SPACE
mov [SAVE_ESI],esi
mov esi,[PARAM_SRC]
mov [SAVE_EBX],ebx
mov ebx,[PARAM_SIZE]
bsf ecx,eax ; trailing twos
mov [SAVE_EBP],ebp
shr eax,cl ; d without twos
mov edx,eax
shr eax,1 ; d/2 without twos
mov [PARAM_DIVISOR],edx
and eax,127
%ifdef PIC
call Lmovl_eip_ebp
add ebp,_GLOBAL_OFFSET_TABLE_
mov ebp,[___gmp_modlimb_invert_table+edx+ebp]
movzx ebp,byte [eax+ebp] ; inv 8 bits
%else
movzx ebp,byte [___gmp_modlimb_invert_table+eax] ; inv 8 bits
%endif
lea eax,[ebp+ebp] ; 2*inv
imul ebp,ebp ; inv*inv
mov [SAVE_EDI],edi
mov edi,[PARAM_DST]
lea esi,[esi+ebx*4] ; src end
imul ebp,[PARAM_DIVISOR] ; inv*inv*d
sub eax,ebp ; inv = 2*inv - inv*inv*d
lea ebp,[eax+eax] ; 2*inv
imul eax,eax ; inv*inv
lea edi,[edi+ebx*4] ; dst end
neg ebx ; -size
mov [PARAM_DST],edi
imul eax,[PARAM_DIVISOR] ; inv*inv*d
sub ebp,eax ; inv = 2*inv - inv*inv*d
mov [VAR_INVERSE],ebp
mov eax,[esi+ebx*4] ; src[0]
or ecx,ecx
jnz Leven
jmp Lodd_entry ; ecx initial carry is zero
; The dependent chain here is
;
; subl %edx,%eax 1
; imull %ebp,%eax 4
; mull PARAM_DIVISOR 5
; ----
; total 10
;
; and this is the measured speed. No special scheduling is necessary,out
; of order execution hides the load latency.
;
; eax scratch (src limb)
; ebx counter,limbs,negative
; ecx carry bit
; edx carry limb,high of last product
; esi &src[size]
; edi &dst[size]
Lodd_top:
mul dword [PARAM_DIVISOR]
mov eax,[esi+ebx*4]
sub eax,ecx
sbb ecx,ecx
sub eax,edx
sbb ecx,0
Lodd_entry:
imul eax,[VAR_INVERSE]
mov [edi+ebx*4],eax
neg ecx
inc ebx
jnz Lodd_top
mov esi,[SAVE_ESI]
mov edi,[SAVE_EDI]
mov ebp,[SAVE_EBP]
mov ebx,[SAVE_EBX]
add esp,STACK_SPACE
ret
; eax src[0]
; ebx counter,limbs,negative
; ecx shift
Leven:
xor ebp,ebp ; initial carry bit
xor edx,edx ; initial carry limb (for size==1)
inc ebx
jz Leven_one
mov edi,[esi+ebx*4] ; src[1]
shrd eax,edi,cl
jmp Leven_entry
; eax scratch
; ebx counter,limbs,negative
; ecx shift
; edx scratch
; esi &src[size]
; edi &dst[size] and scratch
; ebp carry bit
Leven_top:
mov edi,[esi+ebx*4]
mul dword [PARAM_DIVISOR]
mov eax,[-4+esi+ebx*4]
shrd eax,edi,cl
sub eax,ebp
sbb ebp,ebp
sub eax,edx
sbb ebp,0
Leven_entry:
imul eax,[VAR_INVERSE]
mov edi,[PARAM_DST]
neg ebp
mov [-4+edi+ebx*4],eax
inc ebx
jnz Leven_top
mul dword [PARAM_DIVISOR]
mov eax,[-4+esi]
Leven_one:
shr eax,cl
mov esi,[SAVE_ESI]
sub eax,ebp
mov ebp,[SAVE_EBP]
sub eax,edx
mov ebx,[SAVE_EBX]
imul eax,[VAR_INVERSE]
mov [-4+edi],eax
mov edi,[SAVE_EDI]
add esp,STACK_SPACE
ret
%ifdef PIC
Lmovl_eip_ebp:
mov ebp,[esp]
ret
%endif
end

View file

@ -1,149 +0,0 @@
; Copyright 2000, 2002 Free Software Foundation, Inc.
;
; This file is part of the GNU MP Library.
;
; The GNU MP Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License, or (at your option) any later version.
;
; The GNU MP Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the GNU MP Library; see the file COPYING.LIB. If
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
; Suite 330, Boston, MA 02111-1307, USA.
;
; Translation of AT&T syntax code by Brian Gladman
%include "..\x86i.inc"
global ___gmpn_divexact_by3c
%ifdef DLL
export ___gmpn_divexact_by3c
%endif
%define PARAM_CARRY esp+frame+16
%define PARAM_SIZE esp+frame+12
%define PARAM_SRC esp+frame+8
%define PARAM_DST esp+frame+4
%define frame 0
; multiplicative inverse of 3,modulo 2^32
; ceil(b/3),ceil(b*2/3) and floor(b*2/3) where b=2^32
%define INVERSE_3 0xAAAAAAAB
%define ONE_THIRD_CEIL 0x55555556
%define TWO_THIRDS_CEIL 0xAAAAAAAB
%define TWO_THIRDS_FLOOR 0xAAAAAAAA
section .text
align 8
___gmpn_divexact_by3c:
mov ecx,[PARAM_SRC]
mov edx,[PARAM_SIZE]
dec edx
jnz Ltwo_or_more
mov edx,[ecx]
mov eax,[PARAM_CARRY] ; risk of cache bank clash here
mov ecx,[PARAM_DST]
sub edx,eax
sbb eax,eax ; 0 or -1
imul edx,edx,INVERSE_3
neg eax ; 0 or 1
cmp edx,ONE_THIRD_CEIL
sbb eax,-1 ; +1 if edx>=ceil(b/3)
cmp edx,TWO_THIRDS_CEIL
sbb eax,-1 ; +1 if edx>=ceil(b*2/3)
mov [ecx],edx
ret
; eax
; ebx
; ecx src
; edx size-1
; esi
; edi
; ebp
Ltwo_or_more:
FR_push ebx
FR_push esi
FR_push edi
FR_push ebp
mov edi,[PARAM_DST]
mov esi,[PARAM_CARRY]
mov eax,[ecx] ; src low limb
xor ebx,ebx
sub eax,esi
mov esi,TWO_THIRDS_FLOOR
lea ecx,[ecx+edx*4] ; &src[size-1]
lea edi,[edi+edx*4] ; &dst[size-1]
adc ebx,0 ; carry,0 or 1
neg edx ; -(size-1)
; The loop needs a source limb ready at the top,which leads to one limb
; handled separately at the end,and the special case above for size==1.
; There doesn't seem to be any scheduling that would keep the speed but move
; the source load and carry subtract up to the top.
;
; The destination cache line prefetching adds 1 cycle to the loop but is
; considered worthwhile. The slowdown is a factor of 1.07,but will prevent
; repeated write-throughs if the destination isn't in L1. A version using
; an outer loop to prefetch only every 8 limbs (a cache line) proved to be
; no faster,due to unavoidable branch mispreditions in the inner loop.
;
; setc is 2 cycles on P54,so an adcl is used instead. If the movl $0,%ebx
; could be avoided then the src limb fetch could pair up and save a cycle.
; This would probably mean going to a two limb loop with the carry limb
; alternately positive or negative,since an sbbl %ebx,%ebx will leave a
; value which is in the opposite sense to the preceding sbbl/adcl %ebx,%eax.
;
; A register is used for TWO_THIRDS_FLOOR because a cmp can't be done as
; "cmpl %edx,$n" with the immediate as the second operand.
;
; The "4" source displacement is in the loop rather than the setup because
; this gets Ltop aligned to 8 bytes at no cost.
; eax source limb,carry subtracted
; ebx carry (0 or 1)
; ecx &src[size-1]
; edx counter,limbs,negative
; esi TWO_THIRDS_FLOOR
; edi &dst[size-1]
; ebp scratch (result limb)
align 8
Ltop:
imul ebp,eax,INVERSE_3
cmp ebp,ONE_THIRD_CEIL
mov eax,[edi+edx*4] ; dst cache line prefetch
sbb ebx,-1 ; +1 if ebp>=ceil(b/3)
cmp esi,ebp
mov eax,[4+ecx+edx*4] ; next src limb
sbb eax,ebx ; and further -1 if ebp>=ceil(b*2/3)
mov ebx,0
adc ebx,0 ; new carry
mov [edi+edx*4],ebp
inc edx
jnz Ltop
imul edx,eax,INVERSE_3
cmp edx,ONE_THIRD_CEIL
mov [edi],edx
sbb ebx,-1 ; +1 if edx>=ceil(b/3)
cmp edx,TWO_THIRDS_CEIL
sbb ebx,-1 ; +1 if edx>=ceil(b*2/3)
pop ebp
mov eax,ebx
pop edi
pop esi
pop ebx
ret
end

View file

@ -1,519 +0,0 @@
; Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
;
; This file is part of the GNU MP Library.
;
; The GNU MP Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License, or (at your option) any later version.
;
; The GNU MP Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the GNU MP Library; see the file COPYING.LIB. If
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
; Suite 330, Boston, MA 02111-1307, USA.
;
; Translation of AT&T syntax code by Brian Gladman
%include "..\..\x86i.inc"
global ___gmpn_preinv_divrem_1
global ___gmpn_divrem_1c
global ___gmpn_divrem_1
%ifdef DLL
export ___gmpn_divrem_1c
export ___gmpn_divrem_1
%endif
%define MUL_THRESHOLD 4
%define PARAM_PREINV_SHIFT esp+frame+28
%define PARAM_PREINV_INVERSE esp+frame+24
%define PARAM_CARRY esp+frame+24
%define PARAM_DIVISOR esp+frame+20
%define PARAM_SIZE esp+frame+16
%define PARAM_SRC esp+frame+12
%define PARAM_XSIZE esp+frame+8
%define PARAM_DST esp+frame+4
%define SAVE_EBX esp+frame-4
%define SAVE_ESI esp+frame-8
%define SAVE_EDI esp+frame-12
%define SAVE_EBP esp+frame-16
%define VAR_NORM esp+frame-20
%define VAR_INVERSE esp+frame-24
%define VAR_SRC esp+frame-28
%define VAR_DST esp+frame-32
%define VAR_DST_STOP esp+frame-36
%define STACK_SPACE 36
%define frame 0
section .text
align 16
___gmpn_preinv_divrem_1:
mov ecx,[PARAM_XSIZE]
sub esp,STACK_SPACE
FR_sesp STACK_SPACE
mov [SAVE_ESI],esi
mov esi,[PARAM_SRC]
mov [SAVE_EBX],ebx
mov ebx,[PARAM_SIZE]
mov [SAVE_EBP],ebp
mov ebp,[PARAM_DIVISOR]
mov [SAVE_EDI],edi
mov edx,[PARAM_DST]
mov eax,[-4+esi+ebx*4] ; src high limb
xor edi,edi ; initial carry (if can't skip a div)
lea edx,[8+edx+ecx*4] ; &dst[xsize+2]
xor ecx,ecx
mov [VAR_DST_STOP],edx ; &dst[xsize+2]
cmp eax,ebp ; high cmp divisor
cmovc edi,eax
cmovnc ecx,eax ; (the latter in case src==dst)
mov [-12+edx+ebx*4],ecx ; dst high limb
sbb ebx,0 ; skip one division if high<divisor
mov ecx,[PARAM_PREINV_SHIFT]
lea edx,[-8+edx+ebx*4] ; &dst[xsize+size]
mov eax,32
mov [VAR_DST],edx ; &dst[xsize+size]
shl ebp,cl ; d normalized
sub eax,ecx
mov [VAR_NORM],ecx
movd mm7,eax ; rshift
mov eax,[PARAM_PREINV_INVERSE]
jmp Lstart_preinv
align 16
%define frame 0
___gmpn_divrem_1c:
mov edx,[PARAM_CARRY]
mov ecx,[PARAM_SIZE]
sub esp,STACK_SPACE
%define frame STACK_SPACE
mov [SAVE_EBX],ebx
mov ebx,[PARAM_XSIZE]
mov [SAVE_EDI],edi
mov edi,[PARAM_DST]
mov [SAVE_EBP],ebp
mov ebp,[PARAM_DIVISOR]
mov [SAVE_ESI],esi
mov esi,[PARAM_SRC]
lea edi,[-4+edi+ebx*4]
jmp Lstart_1c
; offset 0x31,close enough to aligned
%define frame 0
___gmpn_divrem_1:
mov ecx,[PARAM_SIZE]
mov edx,0 ; initial carry (if can't skip a div)
sub esp,STACK_SPACE
%define frame STACK_SPACE
mov [SAVE_EBP],ebp
mov ebp,[PARAM_DIVISOR]
mov [SAVE_EBX],ebx
mov ebx,[PARAM_XSIZE]
mov [SAVE_ESI],esi
mov esi,[PARAM_SRC]
or ecx,ecx ; size
mov [SAVE_EDI],edi
mov edi,[PARAM_DST]
lea edi,[-4+edi+ebx*4] ; &dst[xsize-1]
jz Lno_skip_div ; if size==0
mov eax,[-4+esi+ecx*4] ; src high limb
xor esi,esi
cmp eax,ebp ; high cmp divisor
cmovc edx,eax
cmovnc esi,eax ; (the latter in case src==dst)
mov [edi+ecx*4],esi ; dst high limb
sbb ecx,0 ; size-1 if high<divisor
mov esi,[PARAM_SRC] ; reload
Lno_skip_div:
; eax
; ebx xsize
; ecx size
; edx carry
; esi src
; edi &dst[xsize-1]
; ebp divisor
Lstart_1c:
lea eax,[ebx+ecx] ; size+xsize
cmp eax,MUL_THRESHOLD
jae Lmul_by_inverse
or ecx,ecx
jz Ldivide_no_integer
; eax scratch (quotient)
; ebx xsize
; ecx counter
; edx scratch (remainder)
; esi src
; edi &dst[xsize-1]
; ebp divisor
Ldivide_integer:
mov eax,[-4+esi+ecx*4]
div ebp
mov [edi+ecx*4],eax
dec ecx
jnz Ldivide_integer
Ldivide_no_integer:
mov edi,[PARAM_DST]
or ebx,ebx
jnz Ldivide_fraction
Ldivide_done:
mov esi,[SAVE_ESI]
mov edi,[SAVE_EDI]
mov ebx,[SAVE_EBX]
mov eax,edx
mov ebp,[SAVE_EBP]
add esp,STACK_SPACE
ret
; eax scratch (quotient)
; ebx counter
; ecx
; edx scratch (remainder)
; esi
; edi dst
; ebp divisor
Ldivide_fraction:
mov eax,0
div ebp
mov [-4+edi+ebx*4],eax
dec ebx
jnz Ldivide_fraction
jmp Ldivide_done
; eax
; ebx xsize
; ecx size
; edx carry
; esi src
; edi &dst[xsize-1]
; ebp divisor
Lmul_by_inverse:
lea ebx,[12+edi] ; &dst[xsize+2],loop dst stop
mov [VAR_DST_STOP],ebx
lea edi,[4+edi+ecx*4] ; &dst[xsize+size]
mov [VAR_DST],edi
mov ebx,ecx ; size
bsr ecx,ebp ; 31-l
mov edi,edx ; carry
lea eax,[1+ecx] ; 32-l
xor ecx,31 ; l
mov [VAR_NORM],ecx
mov edx,-1
shl ebp,cl ; d normalized
movd mm7,eax
mov eax,-1
sub edx,ebp ; (b-d)-1 giving edx:eax = b*(b-d)-1
div ebp ; floor (b*(b-d)-1) / d
; eax inverse
; ebx size
; ecx shift
; edx
; esi src
; edi carry
; ebp divisor
;
; mm7 rshift
Lstart_preinv:
mov [VAR_INVERSE],eax
or ebx,ebx ; size
lea eax,[-12+esi+ebx*4] ; &src[size-3]
mov [VAR_SRC],eax
jz Lstart_zero
mov esi,[8+eax] ; src high limb
cmp ebx,1
jz Lstart_one
Lstart_two_or_more:
mov edx,[4+eax] ; src second highest limb
shld edi,esi,cl
shld esi,edx,cl
cmp ebx,2
je Linteger_two_left
jmp Linteger_top
Lstart_one:
shld edi,esi,cl
shl esi,cl ; n10 = high << l
jmp Linteger_one_left
Lstart_zero:
; Can be here with xsize==0 if mpn_preinv_divrem_1 had size==1 and
; skipped a division.
shl edi,cl ; n2 = carry << l
mov eax,edi ; return value for zero_done
cmp [PARAM_XSIZE],dword 0
je Lzero_done
jmp Lfraction_some
; This loop runs at about 25 cycles,which is probably sub-optimal,and
; certainly more than the dependent chain would suggest. A better loop,or
; a better rough analysis of what's possible,would be welcomed.
;
; In the current implementation,the following successively dependent
; micro-ops seem to exist.
;
; uops
; n2+n1 1 (addl)
; mul 5
; q1+1 3 (addl/adcl)
; mul 5
; sub 3 (subl/sbbl)
; addback 2 (cmov)
; ---
; 19
;
; Lack of registers hinders explicit scheduling and it might be that the
; normal out of order execution isn't able to hide enough under the mul
; latencies.
;
; Using sarl/negl to pick out n1 for the n2+n1 stage is a touch faster than
; cmov (and takes one uop off the dependent chain). A sarl/andl/addl
; combination was tried for the addback (despite the fact it would lengthen
; the dependent chain) but found to be no faster.
; eax scratch
; ebx scratch (nadj,q1)
; ecx scratch (src,dst)
; edx scratch
; esi n10
; edi n2
; ebp d
;
; mm0 scratch (src qword)
; mm7 rshift for normalization
align 16
Linteger_top:
mov eax,esi
mov ebx,ebp
sar eax,31 ; -n1
mov ecx,[VAR_SRC]
and ebx,eax ; -n1 & d
neg eax ; n1
add ebx,esi ; nadj = n10 + (-n1 & d),ignoring overflow
add eax,edi ; n2+n1
movq mm0,[ecx] ; next src limb and the one below it
mul dword [VAR_INVERSE] ; m*(n2+n1)
sub ecx,4
mov [VAR_SRC],ecx
add eax,ebx ; m*(n2+n1) + nadj,low giving carry flag
mov eax,ebp ; d
lea ebx,[1+edi] ; n2+1
adc ebx,edx ; 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
jz Lq1_ff
mul ebx ; (q1+1)*d
mov ecx,[VAR_DST]
psrlq mm0,mm7
sub esi,eax
mov eax,[VAR_DST_STOP]
sbb edi,edx ; n - (q1+1)*d
mov edi,esi ; remainder -> n2
lea edx,[ebp+esi]
cmovc edi,edx
movd esi,mm0
sbb ebx,0 ; q
sub ecx,4
mov [ecx],ebx
cmp ecx,eax
mov [VAR_DST],ecx
jne Linteger_top
Linteger_loop_done:
; Here,and in integer_one_left below,an sbbl $0 is used rather than a jz
; q1_ff special case. This make the code a bit smaller and simpler,and
; costs only 2 cycles (each).
; eax scratch
; ebx scratch (nadj,q1)
; ecx scratch (src,dst)
; edx scratch
; esi n10
; edi n2
; ebp divisor
;
; mm7 rshift
Linteger_two_left:
mov eax,esi
mov ebx,ebp
sar eax,31 ; -n1
mov ecx,[PARAM_SRC]
and ebx,eax ; -n1 & d
neg eax ; n1
add ebx,esi ; nadj = n10 + (-n1 & d),ignoring overflow
add eax,edi ; n2+n1
mul dword [VAR_INVERSE] ; m*(n2+n1)
movd mm0,[ecx] ; src low limb
mov ecx,[VAR_DST_STOP]
add eax,ebx ; m*(n2+n1) + nadj,low giving carry flag
lea ebx,[1+edi] ; n2+1
mov eax,ebp ; d
adc ebx,edx ; 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
sbb ebx,0
mul ebx ; (q1+1)*d
psllq mm0,32
psrlq mm0,mm7
sub esi,eax
sbb edi,edx ; n - (q1+1)*d
mov edi,esi ; remainder -> n2
lea edx,[ebp+esi]
cmovc edi,edx
movd esi,mm0
sbb ebx,0 ; q
mov [-4+ecx],ebx
; eax scratch
; ebx scratch (nadj,q1)
; ecx scratch (dst)
; edx scratch
; esi n10
; edi n2
; ebp divisor
;
; mm7 rshift
Linteger_one_left:
mov eax,esi
mov ebx,ebp
sar eax,31 ; -n1
mov ecx,[VAR_DST_STOP]
and ebx,eax ; -n1 & d
neg eax ; n1
add ebx,esi ; nadj = n10 + (-n1 & d),ignoring overflow
add eax,edi ; n2+n1
mul dword [VAR_INVERSE] ; m*(n2+n1)
add eax,ebx ; m*(n2+n1) + nadj,low giving carry flag
lea ebx,[1+edi] ; n2+1
mov eax,ebp ; d
adc ebx,edx ; 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
sbb ebx,0 ; q1 if q1+1 overflowed
mul ebx
sub esi,eax
mov eax,[PARAM_XSIZE]
sbb edi,edx ; n - (q1+1)*d
mov edi,esi ; remainder -> n2
lea edx,[ebp+esi]
cmovc edi,edx
sbb ebx,0 ; q
mov [-8+ecx],ebx
sub ecx,8
or eax,eax ; xsize
jnz Lfraction_some
mov eax,edi
Lfraction_done:
mov ecx,[VAR_NORM]
Lzero_done:
mov ebp,[SAVE_EBP]
mov edi,[SAVE_EDI]
mov esi,[SAVE_ESI]
mov ebx,[SAVE_EBX]
add esp,STACK_SPACE
shr eax,cl
emms
ret
; Special case for q1=0xFFFFFFFF,giving q=0xFFFFFFFF meaning the low dword
; of q*d is simply -d and the remainder n-q*d = n10+d
;
; eax (divisor)
; ebx (q1+1 == 0)
; ecx
; edx
; esi n10
; edi n2
; ebp divisor
Lq1_ff:
mov ecx,[VAR_DST]
mov edx,[VAR_DST_STOP]
sub ecx,4
mov [VAR_DST],ecx
psrlq mm0,mm7
lea edi,[ebp+esi] ; n-q*d remainder -> next n2
mov [ecx],dword -1
movd esi,mm0 ; next n10
cmp edx,ecx
jne Linteger_top
jmp Linteger_loop_done
;
; In the current implementation,the following successively dependent
; micro-ops seem to exist.
;
; uops
; mul 5
; q1+1 1 (addl)
; mul 5
; sub 3 (negl/sbbl)
; addback 2 (cmov)
; ---
; 16
;
; The loop in fact runs at about 17.5 cycles. Using a sarl/andl/addl for
; the addback was found to be a touch slower.
; eax
; ebx
; ecx
; edx
; esi
; edi carry
; ebp divisor
align 16
Lfraction_some:
mov esi,[PARAM_DST]
mov ecx,[VAR_DST_STOP] ; &dst[xsize+2]
mov eax,edi
sub ecx,8 ; &dst[xsize]
; eax n2,then scratch
; ebx scratch (nadj,q1)
; ecx dst,decrementing
; edx scratch
; esi dst stop point
; edi n2
; ebp divisor
align 16
Lfraction_top:
mul dword [VAR_INVERSE] ; m*n2
mov eax,ebp ; d
sub ecx,4 ; dst
lea ebx,[edi+1]
add ebx,edx ; 1 + high(n2<<32 + m*n2) = q1+1
mul ebx ; (q1+1)*d
neg eax ; low of n - (q1+1)*d
sbb edi,edx ; high of n - (q1+1)*d,caring only about carry
lea edx,[ebp+eax]
cmovc eax,edx
sbb ebx,0 ; q
mov edi,eax ; remainder->n2
cmp ecx,esi
mov [ecx],ebx ; previous q
jne Lfraction_top
jmp Lfraction_done
end

View file

@ -1,354 +0,0 @@
; Copyright 2001 Free Software Foundation, Inc.
;
; This file is part of the GNU MP Library.
;
; The GNU MP Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License, or (at your option) any later version.
;
; The GNU MP Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the GNU MP Library; see the file COPYING.LIB. If
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
; Suite 330, Boston, MA 02111-1307, USA.
;
; Translation of AT&T syntax code by Brian Gladman
%include "..\..\x86i.inc"
global ___gmpn_lshift
%ifdef DLL
export ___gmpn_lshift
%endif
%define PARAM_SHIFT esp+frame+16
%define PARAM_SIZE esp+frame+12
%define PARAM_SRC esp+frame+8
%define PARAM_DST esp+frame+4
%define frame 8
; minimum 5,because the unrolled loop can't handle less
%define UNROLL_THRESHOLD 5
section .text
align 8
___gmpn_lshift:
push ebx
push edi
mov eax,[PARAM_SIZE]
mov edx,[PARAM_DST]
mov ebx,[PARAM_SRC]
mov ecx,[PARAM_SHIFT]
cmp eax,UNROLL_THRESHOLD
jae Lunroll
mov edi,[-4+ebx+eax*4] ; src high limb
dec eax
jnz Lsimple
shld eax,edi,cl
shl edi,cl
mov [edx],edi ; dst low limb
pop edi ; risk of data cache bank clash
pop ebx
ret
; eax size-1
; ebx src
; ecx shift
; edx dst
; esi
; edi
; ebp
Lsimple:
movd mm5,[ebx+eax*4] ; src high limb
movd mm6,ecx ; lshift
neg ecx
psllq mm5,mm6
add ecx,32
movd mm7,ecx
psrlq mm5,32 ; retval
; eax counter,limbs,negative
; ebx src
; ecx
; edx dst
; esi
; edi
;
; mm0 scratch
; mm5 return value
; mm6 shift
; mm7 32-shift
Lsimple_top:
movq mm0,[ebx+eax*4-4]
dec eax
psrlq mm0,mm7
movd [4+edx+eax*4],mm0
jnz Lsimple_top
movd mm0,[ebx]
movd eax,mm5
psllq mm0,mm6
pop edi
pop ebx
movd [edx],mm0
emms
ret
; eax size
; ebx src
; ecx shift
; edx dst
; esi
; edi
; ebp
align 8
Lunroll:
movd mm5,[ebx+eax*4-4] ; src high limb
lea edi,[ebx+eax*4]
movd mm6,ecx ; lshift
and edi,4
psllq mm5,mm6
jz Lstart_src_aligned
; src isn't aligned,process high limb separately (marked xxx) to
; make it so.
;
; source -8(ebx,%eax,4)
; |
; +-------+-------+-------+--
; | |
; +-------+-------+-------+--
; 0mod8 4mod8 0mod8
;
; dest
; -4(edx,%eax,4)
; |
; +-------+-------+--
; | xxx | |
; +-------+-------+--
movq mm0,[ebx+eax*4-8] ; unaligned load
psllq mm0,mm6
dec eax
psrlq mm0,32
movd [edx+eax*4],mm0
Lstart_src_aligned:
movq mm1,[ebx+eax*4-8] ; src high qword
lea edi,[edx+eax*4]
and edi,4
psrlq mm5,32 ; return value
movq mm3,[ebx+eax*4-16] ; src second highest qword
jz Lstart_dst_aligned
; dst isn't aligned,subtract 4 to make it so,and pretend the shift
; is 32 bits extra. High limb of dst (marked xxx) handled here
; separately.
;
; source -8(ebx,%eax,4)
; |
; +-------+-------+--
; | mm1 |
; +-------+-------+--
; 0mod8 4mod8
;
; dest
; -4(edx,%eax,4)
; |
; +-------+-------+-------+--
; | xxx | |
; +-------+-------+-------+--
; 0mod8 4mod8 0mod8
movq mm0,mm1
add ecx,32 ; new shift
psllq mm0,mm6
movd mm6,ecx
psrlq mm0,32
; wasted cycle here waiting for %mm0
movd [-4+edx+eax*4],mm0
sub edx,4
Lstart_dst_aligned:
psllq mm1,mm6
neg ecx ; -shift
add ecx,64 ; 64-shift
movq mm2,mm3
movd mm7,ecx
sub eax,8 ; size-8
psrlq mm3,mm7
por mm3,mm1 ; mm3 ready to store
jc Lfinish
; The comments in mpn_rshift apply here too.
; eax counter,limbs
; ebx src
; ecx
; edx dst
; esi
; edi
;
; mm0
; mm1
; mm2 src qword from 16(%ebx,%eax,4)
; mm3 dst qword ready to store to 24(%edx,%eax,4)
;
; mm5 return value
; mm6 lshift
; mm7 rshift
align 8
Lunroll_loop:
movq mm0,[ebx+eax*4+8]
psllq mm2,mm6
movq mm1,mm0
psrlq mm0,mm7
movq [24+edx+eax*4],mm3
por mm0,mm2
movq mm3,[ebx+eax*4]
psllq mm1,mm6
movq [16+edx+eax*4],mm0
movq mm2,mm3
psrlq mm3,mm7
sub eax,4
por mm3,mm1
jnc Lunroll_loop
Lfinish:
; eax -4 to -1 representing respectively 0 to 3 limbs remaining
test al,2
jz Lfinish_no_two
movq mm0,[ebx+eax*4+8]
psllq mm2,mm6
movq mm1,mm0
psrlq mm0,mm7
movq [24+edx+eax*4],mm3 ; prev
por mm0,mm2
movq mm2,mm1
movq mm3,mm0
sub eax,2
Lfinish_no_two:
; eax -4 or -3 representing respectively 0 or 1 limbs remaining
; mm2 src prev qword,from 16(%ebx,%eax,4)
; mm3 dst qword,for 24(%edx,%eax,4)
test al,1
movd eax,mm5 ; retval
pop edi
jz Lfinish_zero
; One extra src limb,destination was aligned.
;
; source ebx
; --+---------------+-------+
; | mm2 | |
; --+---------------+-------+
;
; dest edx+12 edx+4 edx
; --+---------------+---------------+-------+
; | mm3 | | |
; --+---------------+---------------+-------+
;
; mm6 = shift
; mm7 = ecx = 64-shift
; One extra src limb,destination was unaligned.
;
; source ebx
; --+---------------+-------+
; | mm2 | |
; --+---------------+-------+
;
; dest edx+12 edx+4
; --+---------------+---------------+
; | mm3 | |
; --+---------------+---------------+
;
; mm6 = shift+32
; mm7 = ecx = 64-(shift+32)
; In both cases there's one extra limb of src to fetch and combine
; with mm2 to make a qword at 4(%edx),and in the aligned case
; there's an extra limb of dst to be formed from that extra src limb
; left shifted.
movd mm0,[ebx]
psllq mm2,mm6
movq [12+edx],mm3
psllq mm0,32
movq mm1,mm0
psrlq mm0,mm7
por mm0,mm2
psllq mm1,mm6
movq [4+edx],mm0
psrlq mm1,32
and ecx,32
pop ebx
jz Lfinish_one_unaligned
movd [edx],mm1
Lfinish_one_unaligned:
emms
ret
Lfinish_zero:
; No extra src limbs,destination was aligned.
;
; source ebx
; --+---------------+
; | mm2 |
; --+---------------+
;
; dest edx+8 edx
; --+---------------+---------------+
; | mm3 | |
; --+---------------+---------------+
;
; mm6 = shift
; mm7 = ecx = 64-shift
; No extra src limbs,destination was unaligned.
;
; source ebx
; --+---------------+
; | mm2 |
; --+---------------+
;
; dest edx+8 edx+4
; --+---------------+-------+
; | mm3 | |
; --+---------------+-------+
;
; mm6 = shift+32
; mm7 = ecx = 64-(shift+32)
; The movd for the unaligned case writes the same data to 4(%edx)
; that the movq does for the aligned case.
movq [8+edx],mm3
and ecx,32
psllq mm2,mm6
jz Lfinish_zero_unaligned
movq [edx],mm2
Lfinish_zero_unaligned:
psrlq mm2,32
pop ebx
movd eax,mm5 ; retval
movd [4+edx],mm2
emms
ret
end

View file

@ -1,179 +0,0 @@
; Copyright 2000, 2002 Free Software Foundation, Inc.
;
; This file is part of the GNU MP Library.
;
; The GNU MP Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License, or (at your option) any later version.
;
; The GNU MP Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the GNU MP Library; see the file COPYING.LIB. If
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
; Suite 330, Boston, MA 02111-1307, USA.
;
; Translation of AT&T syntax code by Brian Gladman
%include "..\..\x86i.inc"
%define REG_AAAAAAAAAAAAAAAA mm7
%define REG_3333333333333333 mm6
%define REG_0F0F0F0F0F0F0F0F mm5
%define REG_000000FF000000FF mm4
%ifndef PIC
section .data
align 8
Lrodata_AAAAAAAAAAAAAAAA:
dd 0AAAAAAAAh
dd 0AAAAAAAAh
Lrodata_3333333333333333:
dd 033333333h
dd 033333333h
Lrodata_0F0F0F0F0F0F0F0F:
dd 00F0F0F0Fh
dd 00F0F0F0Fh
Lrodata_000000FF000000FF:
dd 0000000FFh
dd 0000000FFh
%endif
%macro ph_fun 3
section .text
align 32
global %1%2
%ifdef DLL
export %1%2
%endif
%if %3 == 0
%ifdef PIC
nop ; avoid shrl crossing a 32-byte boundary
%endif
%endif
%1%2:
mov ecx, [PARAM_SIZE]
%ifdef PIC
mov eax, 0xAAAAAAAA
mov edx, 0x33333333
movd mm7,eax
movd mm6,edx
mov eax, 0x0F0F0F0F
mov edx, 0x000000FF
punpckldq mm7,mm7
punpckldq mm6,mm6
movd mm5,eax
movd mm4,edx
punpckldq mm5,mm5
punpckldq mm4,mm4
%else
movq mm7,[Lrodata_AAAAAAAAAAAAAAAA]
movq mm6,[Lrodata_3333333333333333]
movq mm5,[Lrodata_0F0F0F0F0F0F0F0F]
movq mm4,[Lrodata_000000FF000000FF]
%endif
mov eax,[PARAM_SRC]
%if %3 == 1
mov edx,[PARAM_SRC2]
%endif
pxor mm2,mm2
shr ecx,1
jnc %%Ltop
movd mm1,[eax+ecx*8] ; Zdisp( movd,0,(%eax,%ecx,8),%mm1)
%if %3 == 1
movd mm0,[edx+ecx*8] ; Zdisp( movd,0,(%edx,%ecx,8),%mm0)"
pxor mm1,mm0
%endif
inc ecx
jmp %%Lloaded
align 16
%if %3 == 0
nop
%endif
; eax src
; ebx
; ecx counter,qwords,decrementing
; edx [hamdist] src2
;
; mm0 (scratch)
; mm1 (scratch)
; mm2 total (low dword)
; mm3
; mm4 \
; mm5 | special constants
; mm6 |
; mm7 /
%%Ltop:
movq mm1,[eax+ecx*8-8]
%if %3 == 1
pxor mm1,[edx+ecx*8-8]
%endif
%%Lloaded:
movq mm0,mm1
pand mm1,REG_AAAAAAAAAAAAAAAA
psrlq mm1,1
%if %3 == 1
nop
%endif
psubd mm0,mm1 ; bit pairs
%if %3 == 1
nop
%endif
movq mm1,mm0
psrlq mm0,2
pand mm0,REG_3333333333333333
pand mm1,REG_3333333333333333
paddd mm0,mm1 ; nibbles
movq mm1,mm0
psrlq mm0,4
pand mm0,REG_0F0F0F0F0F0F0F0F
pand mm1,REG_0F0F0F0F0F0F0F0F
paddd mm0,mm1 ; bytes
movq mm1,mm0
psrlq mm0,8
paddb mm0,mm1 ; words
movq mm1,mm0
psrlq mm0,16
paddd mm0,mm1 ; dwords
pand mm0,REG_000000FF000000FF
paddd mm2,mm0 ; low to total
psrlq mm0,32
paddd mm2,mm0 ; high to total
loop %%Ltop
movd eax,mm2
emms
ret
%endmacro
%define PARAM_SIZE esp+frame+8
%define PARAM_SRC esp+frame+4
%define frame 0
ph_fun ___g,mpn_popcount,0
%define PARAM_SIZE esp+frame+12
%define PARAM_SRC2 esp+frame+8
%define PARAM_SRC esp+frame+4
%define frame 0
ph_fun ___g,mpn_hamdist,1
end

View file

@ -1,362 +0,0 @@
; Copyright 2001 Free Software Foundation, Inc.
;
; This file is part of the GNU MP Library.
;
; The GNU MP Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License, or (at your option) any later version.
;
; The GNU MP Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the GNU MP Library; see the file COPYING.LIB. If
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
; Suite 330, Boston, MA 02111-1307, USA.
;
; Translation of AT&T syntax code by Brian Gladman
%include "..\..\x86i.inc"
global ___gmpn_rshift
%ifdef DLL
export ___gmpn_rshift
%endif
%define PARAM_SHIFT esp+frame+16
%define PARAM_SIZE esp+frame+12
%define PARAM_SRC esp+frame+8
%define PARAM_DST esp+frame+4
%define frame 8
; Minimum 5,because the unrolled loop can't handle less.
%define UNROLL_THRESHOLD 5
section .text
align 8
___gmpn_rshift:
push ebx
push edi
mov eax,[PARAM_SIZE]
mov edx,[PARAM_DST]
mov ebx,[PARAM_SRC]
mov ecx,[PARAM_SHIFT]
cmp eax,UNROLL_THRESHOLD
jae Lunroll
dec eax
mov edi,[ebx] ; src low limb
jnz Lsimple
shrd eax,edi,cl
shr edi,cl
mov [edx],edi ; dst low limb
pop edi ; risk of data cache bank clash
pop ebx
ret
; eax size-1
; ebx src
; ecx shift
; edx dst
; esi
; edi
; ebp
align 8
Lsimple:
movd mm5,[ebx] ; src[0]
lea ebx,[ebx+eax*4] ; &src[size-1]
movd mm6,ecx ; rshift
lea edx,[-4+edx+eax*4] ; &dst[size-2]
psllq mm5,32
neg eax
; This loop is 5 or 8 cycles,with every second load unaligned and a wasted
; cycle waiting for the mm0 result to be ready. For comparison a shrdl is 4
; cycles and would be 8 in a simple loop. Using mmx helps the return value
; and last limb calculations too.
; eax counter,limbs,negative
; ebx &src[size-1]
; ecx return value
; edx &dst[size-2]
;
; mm0 scratch
; mm5 return value
; mm6 shift
Lsimple_top:
movq mm0,[ebx+eax*4]
inc eax
psrlq mm0,mm6
movd [edx+eax*4],mm0
jnz Lsimple_top
movd mm0,[ebx]
psrlq mm5,mm6 ; return value
psrlq mm0,mm6
pop edi
movd eax,mm5
pop ebx
movd [4+edx],mm0
emms
ret
; eax size
; ebx src
; ecx shift
; edx dst
; esi
; edi
; ebp
align 8
Lunroll:
movd mm5,[ebx] ; src[0]
mov edi,4
movd mm6,ecx ; rshift
test ebx,edi
psllq mm5,32
jz Lstart_src_aligned
; src isn't aligned,process low limb separately (marked xxx) and
; step src and dst by one limb,making src aligned.
;
; source ebx
; --+-------+-------+-------+
; | xxx |
; --+-------+-------+-------+
; 4mod8 0mod8 4mod8
;
; dest edx
; --+-------+-------+
; | | xxx |
; --+-------+-------+
movq mm0,[ebx] ; unaligned load
psrlq mm0,mm6
add ebx,4
dec eax
movd [edx],mm0
add edx,4
Lstart_src_aligned:
movq mm1,[ebx]
test edx,edi
psrlq mm5,mm6 ; retval
jz Lstart_dst_aligned
; dst isn't aligned,add 4 to make it so,and pretend the shift is
; 32 bits extra. Low limb of dst (marked xxx) handled here
; separately.
;
; source ebx
; --+-------+-------+
; | mm1 |
; --+-------+-------+
; 4mod8 0mod8
;
; dest edx
; --+-------+-------+-------+
; | xxx |
; --+-------+-------+-------+
; 4mod8 0mod8 4mod8
movq mm0,mm1
add ecx,32 ; new shift
psrlq mm0,mm6
movd mm6,ecx
movd [edx],mm0
add edx,4
Lstart_dst_aligned:
movq mm3,[8+ebx]
neg ecx
movq mm2,mm3 ; mm2 src qword
add ecx,64
movd mm7,ecx
psrlq mm1,mm6
lea ebx,[-12+ebx+eax*4]
lea edx,[-20+edx+eax*4]
psllq mm3,mm7
sub eax,7 ; size-7
por mm3,mm1 ; mm3 ready to store
neg eax ; -(size-7)
jns Lfinish
; This loop is the important bit,the rest is just support. Careful
; instruction scheduling achieves the claimed 1.75 c/l. The
; relevant parts of the pairing rules are:
;
; - mmx loads and stores execute only in the U pipe
; - only one mmx shift in a pair
; - wait one cycle before storing an mmx register result
; - the usual address generation interlock
;
; Two qword calculations are slightly interleaved. The instructions
; marked "C" belong to the second qword,and the "C prev" one is for
; the second qword from the previous iteration.
; eax counter,limbs,negative
; ebx &src[size-12]
; ecx
; edx &dst[size-12]
; esi
; edi
;
; mm0
; mm1
; mm2 src qword from -8(%ebx,%eax,4)
; mm3 dst qword ready to store to -8(%edx,%eax,4)
;
; mm5 return value
; mm6 rshift
; mm7 lshift
align 8
Lunroll_loop:
movq mm0,[ebx+eax*4]
psrlq mm2,mm6
movq mm1,mm0
psllq mm0,mm7
movq [-8+edx+eax*4],mm3
por mm0,mm2
movq mm3,[ebx+eax*4+8]
psrlq mm1,mm6
movq [edx+eax*4],mm0
movq mm2,mm3
psllq mm3,mm7
add eax,4
por mm3,mm1
js Lunroll_loop
Lfinish:
; eax 0 to 3 representing respectively 3 to 0 limbs remaining
test al,2
jnz Lfinish_no_two
movq mm0,[ebx+eax*4]
psrlq mm2,mm6
movq mm1,mm0
psllq mm0,mm7
movq [-8+edx+eax*4],mm3 ; prev
por mm0,mm2
movq mm2,mm1
movq mm3,mm0
add eax,2
Lfinish_no_two:
; eax 2 or 3 representing respectively 1 or 0 limbs remaining
;
; mm2 src prev qword,from -8(%ebx,%eax,4)
; mm3 dst qword,for -8(%edx,%eax,4)
test al,1
pop edi
movd eax,mm5 ; retval
jnz Lfinish_zero
; One extra limb,destination was aligned.
;
; source ebx
; +-------+---------------+--
; | | mm2 |
; +-------+---------------+--
;
; dest edx
; +-------+---------------+---------------+--
; | | | mm3 |
; +-------+---------------+---------------+--
;
; mm6 = shift
; mm7 = ecx = 64-shift
; One extra limb,destination was unaligned.
;
; source ebx
; +-------+---------------+--
; | | mm2 |
; +-------+---------------+--
;
; dest edx
; +---------------+---------------+--
; | | mm3 |
; +---------------+---------------+--
;
; mm6 = shift+32
; mm7 = ecx = 64-(shift+32)
; In both cases there's one extra limb of src to fetch and combine
; with mm2 to make a qword at 8(%edx),and in the aligned case
; there's a further extra limb of dst to be formed.
movd mm0,[8+ebx]
psrlq mm2,mm6
movq mm1,mm0
psllq mm0,mm7
movq [edx],mm3
por mm0,mm2
psrlq mm1,mm6
and ecx,32
pop ebx
jz Lfinish_one_unaligned
; dst was aligned,must store one extra limb
movd [16+edx],mm1
Lfinish_one_unaligned:
movq [8+edx],mm0
emms
ret
Lfinish_zero:
; No extra limbs,destination was aligned.
;
; source ebx
; +---------------+--
; | mm2 |
; +---------------+--
;
; dest edx+4
; +---------------+---------------+--
; | | mm3 |
; +---------------+---------------+--
;
; mm6 = shift
; mm7 = ecx = 64-shift
; No extra limbs,destination was unaligned.
;
; source ebx
; +---------------+--
; | mm2 |
; +---------------+--
;
; dest edx+4
; +-------+---------------+--
; | | mm3 |
; +-------+---------------+--
;
; mm6 = shift+32
; mm7 = 64-(shift+32)
; The movd for the unaligned case is clearly the same data as the
; movq for the aligned case,it's just a choice between whether one
; or two limbs should be written.
movq [4+edx],mm3
psrlq mm2,mm6
movd [12+edx],mm2
and ecx,32
pop ebx
jz Lfinish_zero_unaligned
movq [12+edx],mm2
Lfinish_zero_unaligned:
emms
ret
end

View file

@ -1,309 +0,0 @@
; Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
;
; This file is part of the GNU MP Library.
;
; The GNU MP Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License, or (at your option) any later version.
;
; The GNU MP Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the GNU MP Library; see the file COPYING.LIB. If
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
; Suite 330, Boston, MA 02111-1307, USA.
;
; Translation of AT&T syntax code by Brian Gladman
%include "..\x86i.inc"
global ___gmpn_preinv_mod_1
global ___gmpn_mod_1c
global ___gmpn_mod_1
%ifdef DLL
export ___gmpn_mod_1c
export ___gmpn_mod_1
%endif
%define MUL_NORM_THRESHOLD 4
%define MUL_UNNORM_THRESHOLD 5
%define MUL_NORM_DELTA MUL_NORM_THRESHOLD - MUL_UNNORM_THRESHOLD
%define PARAM_INVERSE esp+frame+16
%define PARAM_CARRY esp+frame+16
%define PARAM_DIVISOR esp+frame+12
%define PARAM_SIZE esp+frame+8
%define PARAM_SRC esp+frame+4
%define SAVE_EBX esp+frame-4
%define SAVE_ESI esp+frame-8
%define SAVE_EDI esp+frame-12
%define SAVE_EBP esp+frame-16
%define VAR_NORM esp+frame-20
%define VAR_INVERSE esp+frame-24
%define STACK_SPACE 24
section .text
align 16
%define frame 0
___gmpn_preinv_mod_1:
mov edx,[PARAM_SRC]
sub esp,STACK_SPACE
FR_sesp STACK_SPACE
mov [SAVE_EBX],ebx
mov ebx,[PARAM_SIZE]
mov [SAVE_EBP],ebp
mov ebp,[PARAM_DIVISOR]
mov [SAVE_ESI],esi
mov eax,[PARAM_INVERSE]
mov [SAVE_EDI],edi
mov edi,[-4+edx+ebx*4] ; src high limb
mov [VAR_NORM],dword 0
lea ecx,[-8+edx+ebx*4] ; &src[size-2]
mov esi,edi
sub edi,ebp ; high-divisor
cmovc edi,esi
dec ebx
jnz Lpreinv_entry
jmp Ldone_edi
align 16
%define frame 0
___gmpn_mod_1c:
mov ecx,[PARAM_SIZE]
sub esp,STACK_SPACE
FR_sesp STACK_SPACE
mov [SAVE_EBP],ebp
mov eax,[PARAM_DIVISOR]
mov [SAVE_ESI],esi
mov edx,[PARAM_CARRY]
mov esi,[PARAM_SRC]
or ecx,ecx
jz Ldone_edx ; result==carry if size==0
sar eax,31
mov ebp,[PARAM_DIVISOR]
and eax,MUL_NORM_DELTA
add eax,MUL_UNNORM_THRESHOLD
cmp ecx,eax
jb Ldivide_top
; The carry parameter pretends to be the src high limb.
mov [SAVE_EBX],ebx
lea ebx,[1+ecx] ; size+1
mov eax,edx ; carry
jmp Lmul_by_inverse_1c
align 16
%define frame 0
___gmpn_mod_1:
mov ecx,[PARAM_SIZE]
sub esp,STACK_SPACE
FR_sesp STACK_SPACE
mov edx,0 ; initial carry (if can't skip a div)
mov [SAVE_ESI],esi
mov eax,[PARAM_SRC]
mov [SAVE_EBP],ebp
mov ebp,[PARAM_DIVISOR]
mov esi,[PARAM_DIVISOR]
or ecx,ecx
jz Ldone_edx
mov eax,[-4+eax+ecx*4] ; src high limb
sar ebp,31
and ebp,MUL_NORM_DELTA
add ebp,MUL_UNNORM_THRESHOLD
cmp eax,esi ; carry flag if high<divisor
cmovc edx,eax
mov esi,[PARAM_SRC]
sbb ecx,0 ; size-1 to skip one div
jz Ldone_eax ; done if had size==1
cmp ecx,ebp
mov ebp,[PARAM_DIVISOR]
jae Lmul_by_inverse
; eax scratch (quotient)
; ebx
; ecx counter,limbs,decrementing
; edx scratch (remainder)
; esi src
; edi
; ebp divisor
Ldivide_top:
mov eax,[-4+esi+ecx*4]
div ebp
dec ecx
jnz Ldivide_top
Ldone_edx:
mov eax,edx
Ldone_eax:
mov esi,[SAVE_ESI]
mov ebp,[SAVE_EBP]
add esp,STACK_SPACE
ret
; eax src high limb
; ebx
; ecx
; edx
; esi src
; edi
; ebp divisor
Lmul_by_inverse:
mov [SAVE_EBX],ebx
mov ebx,[PARAM_SIZE]
Lmul_by_inverse_1c:
bsr ecx,ebp ; 31-l
mov [SAVE_EDI],edi
xor ecx,31 ; l
mov [VAR_NORM],ecx
shl ebp,cl ; d normalized
mov edi,eax ; src high -> n2
sub eax,ebp
cmovnc edi,eax
mov eax,-1
mov edx,-1
sub edx,ebp ; (b-d)-1 so edx:eax = b*(b-d)-1
lea ecx,[-8+esi+ebx*4] ; &src[size-2]
div ebp ; floor (b*(b-d)-1) / d
Lpreinv_entry:
mov [VAR_INVERSE],eax
; No special scheduling of loads is necessary in this loop,out of order
; execution hides the latencies already.
;
; The way q1+1 is generated in %ebx and d is moved to %eax for the multiply
; seems fastest. The obvious change to generate q1+1 in %eax and then just
; multiply by %ebp (as per mpn/x86/pentium/mod_1.asm in fact) runs 1 cycle
; slower,for no obvious reason.
; eax n10 (then scratch)
; ebx scratch (nadj,q1)
; ecx src pointer,decrementing
; edx scratch
; esi n10
; edi n2
; ebp divisor
align 16
Linverse_top:
mov eax,[ecx] ; next src limb
mov esi,eax
sar eax,31 ; -n1
mov ebx,ebp
and ebx,eax ; -n1 & d
neg eax ; n1
add eax,edi ; n2+n1
mul dword [VAR_INVERSE] ; m*(n2+n1)
add ebx,esi ; nadj = n10 + (-n1 & d),ignoring overflow
sub ecx,4
add eax,ebx ; m*(n2+n1) + nadj,low giving carry flag
lea ebx,[1+edi] ; n2+1
mov eax,ebp ; d
adc ebx,edx ; 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
jz Lq1_ff
mul ebx ; (q1+1)*d
sub esi,eax ; low n - (q1+1)*d
sbb edi,edx ; high n - (q1+1)*d,0 or -1
and edi,ebp ; d if underflow
add edi,esi ; remainder with addback if necessary
cmp ecx,[PARAM_SRC]
jae Linverse_top
; %edi is the remainder modulo d*2^n and now must be reduced to
; 0<=r<d by calculating r*2^n mod d*2^n and then right shifting by
; n. If d was already normalized on entry so that n==0 then nothing
; is needed here. The chance of n==0 is low,but it's true of say
; PP from gmp-impl.h.
;
; eax
; ebx
; ecx
; edx
; esi
; edi remainder
; ebp divisor (normalized)
Linverse_loop_done:
mov ecx,[VAR_NORM]
mov esi,0
or ecx,ecx
jz Ldone_edi
; Here use %edi=n10 and %esi=n2,opposite to the loop above.
;
; The q1=0xFFFFFFFF case is handled with an sbbl to adjust q1+1
; back,rather than q1_ff special case code. This is simpler and
; costs only 2 uops.
shld esi,edi,cl
shl edi,cl
mov eax,edi ; n10
mov ebx,ebp ; d
sar eax,31 ; -n1
and ebx,eax ; -n1 & d
neg eax ; n1
add ebx,edi ; nadj = n10 + (-n1 & d),ignoring overflow
add eax,esi ; n2+n1
mul dword [VAR_INVERSE] ; m*(n2+n1)
add eax,ebx ; m*(n2+n1) + nadj,low giving carry flag
lea ebx,[1+esi] ; n2+1
adc ebx,edx ; 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
sbb ebx,0
mov eax,ebp ; d
mul ebx ; (q1+1)*d
mov ebx,[SAVE_EBX]
sub edi,eax ; low n - (q1+1)*d is remainder
sbb esi,edx ; high n - (q1+1)*d,0 or -1
and esi,ebp
mov ebp,[SAVE_EBP]
lea eax,[esi+edi] ; remainder
mov esi,[SAVE_ESI]
shr eax,cl ; denorm remainder
mov edi,[SAVE_EDI]
add esp,STACK_SPACE
ret
Ldone_edi:
mov ebx,[SAVE_EBX]
mov eax,edi
mov esi,[SAVE_ESI]
mov edi,[SAVE_EDI]
mov ebp,[SAVE_EBP]
add esp,STACK_SPACE
ret
; Special case for q1=0xFFFFFFFF,giving q=0xFFFFFFFF meaning the low dword
; of q*d is simply -d and the remainder n-q*d = n10+d.
;
; This is reached only very rarely.
;
; eax (divisor)
; ebx (q1+1 == 0)
; ecx src pointer
; edx
; esi n10
; edi (n2)
; ebp divisor
Lq1_ff:
lea edi,[ebp+esi] ; n-q*d remainder -> next n2
cmp ecx,[PARAM_SRC]
jae Linverse_top
jmp Linverse_loop_done
end

View file

@ -1,137 +0,0 @@
; Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
;
; This file is part of the GNU MP Library.
;
; The GNU MP Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License, or (at your option) any later version.
;
; The GNU MP Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the GNU MP Library; see the file COPYING.LIB. If
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
; Suite 330, Boston, MA 02111-1307, USA.
;
; Translation of AT&T syntax code by Brian Gladman
%include "..\x86i.inc"
extern ___gmp_modlimb_invert_table
global ___gmpn_modexact_1c_odd
global ___gmpn_modexact_1_odd
%ifdef DLL
export ___gmpn_modexact_1c_odd
export ___gmpn_modexact_1_odd
%endif
%define PARAM_CARRY esp+frame+16
%define PARAM_DIVISOR esp+frame+12
%define PARAM_SIZE esp+frame+8
%define PARAM_SRC esp+frame+4
; Not enough room under modexact_1 to make these re-use the parameter
; space,unfortunately.
%define SAVE_EBX esp+frame-4
%define SAVE_ESI esp+frame-8
%define SAVE_EDI esp+frame-12
%define STACK_SPACE 12
%define frame 0
section .text
align 16
___gmpn_modexact_1c_odd:
mov ecx,[PARAM_CARRY]
jmp Lstart_1c
align 16
___gmpn_modexact_1_odd:
xor ecx,ecx
Lstart_1c:
mov eax,[PARAM_DIVISOR]
sub esp,STACK_SPACE
FR_sesp STACK_SPACE
mov [SAVE_ESI],esi
mov esi,[PARAM_SRC]
shr eax,1 ; d/2
mov [SAVE_EDI],edi
and eax,127
%ifdef PIC
call Lmovl_eip_edi
add edi,_GLOBAL_OFFSET_TABLE_
mov edi,[___gmp_modlimb_invert_table+edi]
movzx edi,byte [eax+edi] ; inv 8 bits
%else
movzx edi,byte [___gmp_modlimb_invert_table+eax] ; inv 8 bits
%endif
xor edx,edx ; initial extra carry
lea eax,[edi+edi] ; 2*inv
imul edi,edi ; inv*inv
mov [SAVE_EBX],ebx
mov ebx,[PARAM_SIZE]
imul edi,[PARAM_DIVISOR] ; inv*inv*d
sub eax,edi ; inv = 2*inv - inv*inv*d
lea edi,[eax+eax] ; 2*inv
imul eax,eax ; inv*inv
imul eax,[PARAM_DIVISOR] ; inv*inv*d
lea esi,[esi+ebx*4] ; src end
neg ebx ; -size
sub edi,eax ; inv = 2*inv - inv*inv*d
; The dependent chain here is
;
; subl %edx,%eax 1
; imull %edi,%eax 4
; mull PARAM_DIVISOR 5
; ----
; total 10
;
; and this is the measured speed. No special scheduling is necessary,out
; of order execution hides the load latency.
;
; eax scratch (src limb)
; ebx counter,limbs,negative
; ecx carry bit,0 or 1
; edx carry limb,high of last product
; esi &src[size]
; edi inverse
; ebp
Ltop:
mov eax,[esi+ebx*4]
sub eax,ecx
sbb ecx,ecx
sub eax,edx
sbb ecx,0
imul eax,edi
neg ecx
mul dword [PARAM_DIVISOR]
inc ebx
jnz Ltop
mov esi,[SAVE_ESI]
lea eax,[ecx+edx]
mov edi,[SAVE_EDI]
mov ebx,[SAVE_EBX]
add esp,STACK_SPACE
ret
%ifdef PIC
Lmovl_eip_edi:
mov edi,[esp]
ret
%endif
end

View file

@ -1,147 +0,0 @@
; Copyright 2000, 2002 Free Software Foundation, Inc.
;
; This file is part of the GNU MP Library.
;
; The GNU MP Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License, or (at your option) any later version.
;
; The GNU MP Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the GNU MP Library; see the file COPYING.LIB. If
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
; Suite 330, Boston, MA 02111-1307, USA.
;
; Translation of AT&T syntax code by Brian Gladman
%define REG_AAAAAAAAAAAAAAAA mm7
%define REG_3333333333333333 mm6
%define REG_0F0F0F0F0F0F0F0F mm5
%define REG_0000000000000000 mm4
%ifndef PIC
section .data
align 8
Lrodata_AAAAAAAAAAAAAAAA:
dd 0AAAAAAAAh
dd 0AAAAAAAAh
Lrodata_3333333333333333:
dd 033333333h
dd 033333333h
Lrodata_0F0F0F0F0F0F0F0F:
dd 00F0F0F0Fh
dd 00F0F0F0Fh
%endif
%macro ph_fun 3
align 32
global %1%2
%ifdef DLL
export %1%2
%endif
%1%2:
mov ecx,[PARAM_SIZE]
%ifdef PIC
mov eax,0xAAAAAAAA
mov edx,0x33333333
movd mm7,eax
movd mm6,edx
mov eax,0x0F0F0F0F
punpckldq mm7,mm7
punpckldq mm6,mm6
movd mm5,eax
movd mm4,edx
punpckldq mm5,mm5
%else
movq mm7,[Lrodata_AAAAAAAAAAAAAAAA]
movq mm6,[Lrodata_3333333333333333]
movq mm5,[Lrodata_0F0F0F0F0F0F0F0F]
%endif
pxor mm4,mm4
mov eax,[PARAM_SRC]
%if %3 == 1
mov edx,[PARAM_SRC2]
%endif
pxor mm2,mm2
shr ecx,1
jnc %%Ltop
movd mm1,[eax+ecx*8]
%if %3 == 1
movd mm0,[edx+ecx*8]
pxor mm1,mm0
%endif
or ecx,ecx
jmp %%Lloaded
; eax src
; ebx
; ecx counter,qwords,decrementing
; edx [hamdist] src2
;
; mm0 (scratch)
; mm1 (scratch)
; mm2 total (low dword)
; mm3
; mm4 \
; mm5 | special constants
; mm6 |
; mm7 /
align 16
%%Ltop:
movq mm1,[eax+ecx*8-8]
%if %3 == 1
pxor mm1,[edx+ecx*8-8]
%endif
dec ecx
%%Lloaded:
movq mm0,mm1
pand mm1,REG_AAAAAAAAAAAAAAAA
psrlq mm1,1
psubd mm0,mm1 ; bit pairs
movq mm1,mm0
psrlq mm0,2
pand mm0,REG_3333333333333333
pand mm1,REG_3333333333333333
paddd mm0,mm1 ; nibbles
movq mm1,mm0
psrlq mm0,4
pand mm0,REG_0F0F0F0F0F0F0F0F
pand mm1,REG_0F0F0F0F0F0F0F0F
paddd mm0,mm1 ; bytes
psadbw mm0,mm4
paddd mm2,mm0 ; add to total
jnz %%Ltop
movd eax,mm2
emms
ret
%endmacro
section .text
%define PARAM_SIZE esp+frame+8
%define PARAM_SRC esp+frame+4
%define frame 0
ph_fun ___g,mpn_popcount,0
%define PARAM_SIZE esp+frame+12
%define PARAM_SRC2 esp+frame+8
%define PARAM_SRC esp+frame+4
%define frame 0
ph_fun ___g,mpn_hamdist,1
end

View file

@ -1,488 +0,0 @@
; Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
;
; This file is part of the GNU MP Library.
;
; The GNU MP Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License, or (at your option) any later version.
;
; The GNU MP Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the GNU MP Library; see the file COPYING.LIB. If
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
; Suite 330, Boston, MA 02111-1307, USA.
;
; Translation of AT&T syntax code by Brian Gladman
%include "..\x86i.inc"
global ___gmpn_sqr_basecase
%ifdef DLL
export ___gmpn_sqr_basecase
%endif
%define UNROLL_COUNT 64 ; seems to be maximum required (I hope!)
%define PARAM_SIZE esp+frame+12
%define PARAM_SRC esp+frame+8
%define PARAM_DST esp+frame+4
%define frame 0
section .text
align 32
___gmpn_sqr_basecase:
mov edx,[PARAM_SIZE]
mov eax,[PARAM_SRC]
cmp edx,2
mov ecx,[PARAM_DST]
je Ltwo_limbs
mov eax,[eax]
ja Lthree_or_more
; one limb only
; eax src limb
; ebx
; ecx dst
; edx
mul eax
mov [ecx],eax
mov [4+ecx],edx
ret
; eax src
; ebx
; ecx dst
; edx
%define SAVE_ESI esp+frame-4
%define SAVE_EBX esp+frame-8
%define SAVE_EDI esp+frame-12
%define SAVE_EBP esp+frame-16
%define frame 16
Ltwo_limbs:
sub esp,frame
mov [SAVE_ESI],esi
mov esi,eax
mov eax,[eax]
mul eax ; src[0]^2
mov [ecx],eax ; dst[0]
mov eax,[4+esi]
mov [SAVE_EBX],ebx
mov ebx,edx ; dst[1]
mul eax ; src[1]^2
mov [SAVE_EDI],edi
mov edi,eax ; dst[2]
mov eax,[esi]
mov [SAVE_EBP],ebp
mov ebp,edx ; dst[3]
mul dword [4+esi] ; src[0]*src[1]
add ebx,eax
mov esi,[SAVE_ESI]
adc edi,edx
adc ebp,0
add eax,ebx
mov ebx,[SAVE_EBX]
adc edx,edi
mov edi,[SAVE_EDI]
adc ebp,0
mov [4+ecx],eax
mov [12+ecx],ebp
mov ebp,[SAVE_EBP]
mov [8+ecx],edx
add esp,frame
ret
; eax src low limb
; ebx
; ecx dst
; edx size
Lthree_or_more:
sub esp,frame
mov [SAVE_ESI],esi
cmp edx,4
mov esi,[PARAM_SRC]
jae Lfour_or_more
; three limbs
; eax src low limb
; ebx
; ecx dst
; edx
; esi src
; edi
; ebp
mov [SAVE_EBP],ebp
mov [SAVE_EDI],edi
mul eax ; src[0] ^ 2
mov [ecx],eax
mov [4+ecx],edx
mov eax,[4+esi]
xor ebp,ebp
mul eax ; src[1] ^ 2
mov [8+ecx],eax
mov [12+ecx],edx
mov eax,[8+esi]
mov [SAVE_EBX],ebx
mul eax ; src[2] ^ 2
mov [16+ecx],eax
mov [20+ecx],edx
mov eax,[esi]
mul dword [4+esi] ; src[0] * src[1]
mov ebx,eax
mov edi,edx
mov eax,[esi]
mul dword [8+esi] ; src[0] * src[2]
add edi,eax
mov ebp,edx
adc ebp,0
mov eax,[4+esi]
mul dword [8+esi] ; src[1] * src[2]
xor esi,esi
add ebp,eax
; eax
; ebx dst[1]
; ecx dst
; edx dst[4]
; esi zero,will be dst[5]
; edi dst[2]
; ebp dst[3]
adc edx,0
add ebx,ebx
adc edi,edi
adc ebp,ebp
adc edx,edx
mov eax,[4+ecx]
adc esi,0
add eax,ebx
mov [4+ecx],eax
mov eax,[8+ecx]
adc eax,edi
mov ebx,[12+ecx]
adc ebx,ebp
mov edi,[16+ecx]
mov [8+ecx],eax
mov ebp,[SAVE_EBP]
mov [12+ecx],ebx
mov ebx,[SAVE_EBX]
adc edi,edx
mov eax,[20+ecx]
mov [16+ecx],edi
mov edi,[SAVE_EDI]
adc eax,esi ; no carry out of this
mov esi,[SAVE_ESI]
mov [20+ecx],eax
add esp,frame
ret
; eax src low limb
; ebx
; ecx
; edx size
; esi src
; edi
; ebp
; First multiply src[0]*src[1..size-1] and store at dst[1..size].
%define VAR_COUNTER esp+frame-20
%define VAR_JMP esp+frame-24
%define STACK_SPACE 24
Lfour_or_more:
sub esp,STACK_SPACE-frame
%define frame STACK_SPACE
mov ecx,1
mov [SAVE_EDI],edi
mov edi,[PARAM_DST]
mov [SAVE_EBX],ebx
sub ecx,edx ; -(size-1)
mov [SAVE_EBP],ebp
mov ebx,0 ; initial carry
lea esi,[esi+edx*4] ; &src[size]
mov ebp,eax ; multiplier
lea edi,[-4+edi+edx*4] ; &dst[size-1]
; This loop runs at just over 6 c/l.
; eax scratch
; ebx carry
; ecx counter,limbs,negative,-(size-1) to -1
; edx scratch
; esi &src[size]
; edi &dst[size-1]
; ebp multiplier
Lmul_1:
mov eax,ebp
mul dword [esi+ecx*4]
add eax,ebx
mov ebx,0
adc ebx,edx
mov [4+edi+ecx*4],eax
inc ecx
jnz Lmul_1
mov [4+edi],ebx
; Addmul src[n]*src[n+1..size-1] at dst[2*n-1...],for each n=1..size-2.
;
; The last two addmuls,which are the bottom right corner of the product
; triangle,are left to the end. These are src[size-3]*src[size-2,size-1]
; and src[size-2]*src[size-1]. If size is 4 then it's only these corner
; cases that need to be done.
;
; The unrolled code is the same as mpn_addmul_1(),see that routine for some
; comments.
;
; VAR_COUNTER is the outer loop,running from -(size-4) to -1,inclusive.
;
; VAR_JMP is the computed jump into the unrolled code,stepped by one code
; chunk each outer loop.
;
; This is also hard-coded in the address calculation below.
;
; With &src[size] and &dst[size-1] pointers,the displacements in the
; unrolled code fit in a byte for UNROLL_COUNT values up to 32,but above
; that an offset must be added to them.
;
; eax
; ebx carry
; ecx
; edx
; esi &src[size]
; edi &dst[size-1]
; ebp
%define CODE_BYTES_PER_LIMB 15
%if UNROLL_COUNT > 32
%define OFFSET 4*(UNROLL_COUNT-32)
%else
%define OFFSET 0
%endif
mov ecx,[PARAM_SIZE]
sub ecx,4
jz Lcorner
mov edx,ecx
neg ecx
shl ecx,4
%if OFFSET != 0
sub esi,OFFSET
%endif
%ifdef PIC
call Lhere
Lhere:
add ecx,[esp]
add ecx,Lunroll_inner_end-Lhere-(2*CODE_BYTES_PER_LIMB)
add ecx,edx
add esp,4
%else
lea ecx,[Lunroll_inner_end-2*CODE_BYTES_PER_LIMB+ecx+edx]
%endif
neg edx
%if OFFSET != 0
sub edi,OFFSET
%endif
; The calculated jump mustn't be before the start of the available
; code. This is the limit that UNROLL_COUNT puts on the src operand
; size,but checked here using the jump address directly.
; ASSERT(ae,movl_text_address( Lunroll_inner_start,%eax) cmpl %eax,%ecx)
%ifdef ASSERT
mov eax,Lunroll_inner_start
cmp ecx,eax
jae Lunroll_outer_top
jmp exit
%endif
; eax
; ebx high limb to store
; ecx VAR_JMP
; edx VAR_COUNTER,limbs,negative
; esi &src[size],constant
; edi dst ptr,second highest limb of last addmul
; ebp
%if UNROLL_COUNT % 2 == 1
%define cmovX cmovz
%else
%define cmovX cmovnz
%endif
align 16
Lunroll_outer_top:
mov ebp,[-12+OFFSET+esi+edx*4] ; multiplier
mov [VAR_COUNTER],edx
mov eax,[-8+OFFSET+esi+edx*4] ; first limb of multiplicand
mul ebp
test cl,1
mov ebx,edx ; high carry
lea edi,[4+edi]
mov edx,ecx ; jump
mov ecx,eax ; low carry
lea edx,[CODE_BYTES_PER_LIMB+edx]
cmovX ecx,ebx
cmovX ebx,eax
mov [VAR_JMP],edx
jmp edx
; Must be on an even address here so the low bit of the jump address
; will indicate which way around ecx/ebx should start.
; eax scratch
; ebx carry high
; ecx carry low
; edx scratch
; esi src pointer
; edi dst pointer
; ebp multiplier
;
; 15 code bytes each limb
; ecx/ebx reversed on each chunk
align 2
Lunroll_inner_start:
%assign i UNROLL_COUNT
%rep UNROLL_COUNT
%assign disp_src OFFSET-4*i
%assign disp_dst disp_src
; m4_assert(disp_src>=-128 && disp_src<128)
; m4_assert(disp_dst>=-128 && disp_dst<128)
mov eax,[byte disp_src+esi]
mul ebp
%if i % 2 == 0
add [byte disp_dst+edi],ebx
adc ecx,eax
mov ebx,edx
adc ebx,0
%else
add [byte disp_dst+edi],ecx
adc ebx,eax
mov ecx,edx
adc ecx,0
%endif
%assign i i-1
%endrep
Lunroll_inner_end:
add [OFFSET+edi],ebx
mov edx,[VAR_COUNTER]
adc ecx,0
mov [OFFSET+4+edi],ecx
mov ecx,[VAR_JMP]
inc edx
jnz Lunroll_outer_top
%if OFFSET != 0
add esi,OFFSET
add edi,OFFSET
%endif
; eax
; ebx
; ecx
; edx
; esi &src[size]
; edi &dst[2*size-5]
; ebp
align 16
Lcorner:
mov eax,[-12+esi]
mul dword [-8+esi]
add [edi],eax
mov eax,[-12+esi]
mov ebx,0
adc ebx,edx
mul dword [-4+esi]
add ebx,eax
mov eax,[-8+esi]
adc edx,0
add [4+edi],ebx
mov ebx,0
adc ebx,edx
mul dword [-4+esi]
mov ecx,[PARAM_SIZE]
add eax,ebx
adc edx,0
mov [8+edi],eax
mov [12+edi],edx
mov edi,[PARAM_DST]
; Left shift of dst[1..2*size-2],the bit shifted out becomes dst[2*size-1].
sub ecx,1 ; size-1
xor eax,eax ; ready for final adcl,and clear carry
mov edx,ecx
mov esi,[PARAM_SRC]
; eax
; ebx
; ecx counter,size-1 to 1
; edx size-1 (for later use)
; esi src (for later use)
; edi dst,incrementing
; ebp
Llshift:
rcl dword [4+edi],1
rcl dword [8+edi],1
lea edi,[8+edi]
dec ecx
jnz Llshift
adc eax,eax
mov [4+edi],eax ; dst most significant limb
mov eax,[esi] ; src[0]
lea esi,[4+esi+edx*4] ; &src[size]
sub ecx,edx ; -(size-1)
; Now add in the squares on the diagonal,src[0]^2,src[1]^2,...,
; src[size-1]^2. dst[0] hasn't yet been set at all yet,and just gets the
; low limb of src[0]^2.
mul eax
mov [edi+ecx*8],eax ; dst[0]
; eax scratch
; ebx scratch
; ecx counter,negative
; edx carry
; esi &src[size]
; edi dst[2*size-2]
; ebp
Ldiag:
mov eax,[esi+ecx*4]
mov ebx,edx
mul eax
add [4+edi+ecx*8],ebx
adc [8+edi+ecx*8],eax
adc edx,0
inc ecx
jnz Ldiag
mov esi,[SAVE_ESI]
mov ebx,[SAVE_EBX]
add [4+edi],edx ; dst most significant limb
mov edi,[SAVE_EDI]
mov ebp,[SAVE_EBP]
add esp,frame
ret
end

View file

@ -1,520 +0,0 @@
; Intel P6 mpn_sqr_basecase -- square an mpn number.
;
; Copyright 1999,2000,2002 Free Software Foundation,Inc.
;
; This file is part of the GNU MP Library.
;
; The GNU MP Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License,or (at your option) any later version.
;
; The GNU MP Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the GNU MP Library; see the file COPYING.LIB. If
; not,write to the Free Software Foundation,Inc.,59 Temple Place -
; Suite 330,Boston,MA 02111-1307,USA.
;
; P6: approx 4.0 cycles per cross product,or 7.75 cycles per triangular
; product (measured on the speed difference between 20 and 40 limbs,
; which is the Karatsuba recursing range).
;
; These are the same as in mpn/x86/k6/sqr_basecase.asm,see that file for
; a description. The only difference here is that UNROLL_COUNT can go up
; to 64 (not 63) making SQR_KARATSUBA_THRESHOLD_MAX 67.
;
; void mpn_sqr_basecase (mp_ptr dst,mp_srcptr src,mp_size_t size);
;
; The algorithm is basically the same as mpn/generic/sqr_basecase.c,but a
; lot of function call overheads are avoided,especially when the given size
; is small.
;
; The code size might look a bit excessive,but not all of it is executed so
; it won't all get into the code cache. The 1x1,2x2 and 3x3 special cases
; clearly apply only to those sizes; mid sizes like 10x10 only need part of
; the unrolled addmul; and big sizes like 40x40 that do use the full
; unrolling will least be making good use of it,because 40x40 will take
; something like 7000 cycles.
%include "..\\x86i.inc"
%define SQR_KARATSUBA_THRESHOLD 10
%define SQR_KARATSUBA_THRESHOLD_MAX 67
%ifdef SQR_KARATSUBA_THRESHOLD_OVERRIDE
%define SQR_KARATSUBA_THRESHOLD SQR_KARATSUBA_THRESHOLD_OVERRIDE
%endif
%define UNROLL_COUNT SQR_KARATSUBA_THRESHOLD-3
FR_def PARAM_SIZE,12
FR_def PARAM_SRC,8
FR_def PARAM_DST,4
section .text
global ___gmpn_sqr_basecase
align 32
___gmpn_sqr_basecase:
mov edx,[PARAM_SIZE]
mov eax,[PARAM_SRC]
cmp edx,2
mov ecx,[PARAM_DST]
je Ltwo_limbs
mov eax,[eax]
ja Lthree_or_more
; one limb only
; eax src limb
; ebx
; ecx dst
; edx
mul eax
mov [ecx],eax
mov [4+ecx],edx
ret
; eax src
; ebx
; ecx dst
; edx
FR_def SAVE_ESI, -4
FR_def SAVE_EBX, -8
FR_def SAVE_EDI,-12
FR_def SAVE_EBP,-16
%define STACK_SPACE 16
%define frame 16
Ltwo_limbs:
sub esp,frame
mov [SAVE_ESI],esi
mov esi,eax
mov eax,[eax]
mul eax ; src[0]^2
mov [ecx],eax ; dst[0]
mov eax,[4+esi]
mov [SAVE_EBX],ebx
mov ebx,edx ; dst[1]
mul eax ; src[1]^2
mov [SAVE_EDI],edi
mov edi,eax ; dst[2]
mov eax,[esi]
mov [SAVE_EBP],ebp
mov ebp,edx ; dst[3]
mul dword [4+esi] ; src[0]*src[1]
add ebx,eax
mov esi,[SAVE_ESI]
adc edi,edx
adc ebp,0
add eax,ebx
mov ebx,[SAVE_EBX]
adc edx,edi
mov edi,[SAVE_EDI]
adc ebp,0
mov [4+ecx],eax
mov [12+ecx],ebp
mov ebp,[SAVE_EBP]
mov [8+ecx],edx
add esp,frame
ret
; eax src low limb
; ebx
; ecx dst
; edx size
%define frame 0
Lthree_or_more:
FR_push esi
cmp edx,4
mov esi,[PARAM_SRC]
jae Lfour_or_more
; three limbs
;
; eax src low limb
; ebx
; ecx dst
; edx
; esi src
; edi
; ebp
%undef SAVE_EBP
%undef SAVE_EDI
%undef SAVE_EBX
FR_push ebp,SAVE_EBP
FR_push edi,SAVE_EDI
mul eax ; src[0] ^ 2
mov [ecx],eax
mov [4+ecx],edx
mov eax,[4+esi]
xor ebp,ebp
mul eax ; src[1] ^ 2
mov [8+ecx],eax
mov [12+ecx],edx
mov eax,[8+esi]
FR_push ebx,SAVE_EBX
mul eax ; src[2] ^ 2
mov [16+ecx],eax
mov [20+ecx],edx
mov eax,[esi]
mul dword [4+esi] ; src[0] * src[1]
mov ebx,eax
mov edi,edx
mov eax,[esi]
mul dword [8+esi] ; src[0] * src[2]
add edi,eax
mov ebp,edx
adc ebp,0
mov eax,[4+esi]
mul dword [8+esi] ; src[1] * src[2]
xor esi,esi
add ebp,eax
; eax
; ebx dst[1]
; ecx dst
; edx dst[4]
; esi zero,will be dst[5]
; edi dst[2]
; ebp dst[3]
adc edx,0
add ebx,ebx
adc edi,edi
adc ebp,ebp
adc edx,edx
mov eax,[4+ecx]
adc esi,0
add eax,ebx
mov [4+ecx],eax
mov eax,[8+ecx]
adc eax,edi
mov ebx,[12+ecx]
adc ebx,ebp
mov edi,[16+ecx]
mov [8+ecx],eax
mov ebp,[SAVE_EBP]
mov [12+ecx],ebx
mov ebx,[SAVE_EBX]
adc edi,edx
mov eax,[20+ecx]
mov [16+ecx],edi
mov edi,[SAVE_EDI]
adc eax,esi ; no carry out of this
mov esi,[SAVE_ESI]
mov [20+ecx],eax
add esp,frame
ret
; eax src low limb
; ebx
; ecx
; edx size
; esi src
; edi
; ebp
%define VAR_COUNTER esp+frame-20
%define VAR_JMP esp+frame-24
%define STACK_SPACE 24
%define frame 4
; First multiply src[0]*src[1..size-1] and store at dst[1..size].
Lfour_or_more:
sub esp,STACK_SPACE-frame
%define frame STACK_SPACE
mov ecx,1
mov [SAVE_EDI],edi
mov edi,[PARAM_DST]
mov [SAVE_EBX],ebx
sub ecx,edx ; -(size-1)
mov [SAVE_EBP],ebp
mov ebx,0 ; initial carry
lea esi,[esi+edx*4] ; &src[size]
mov ebp,eax ; multiplier
lea edi,[-4+edi+edx*4] ; &dst[size-1]
; This loop runs at just over 6 c/l.
;
; eax scratch
; ebx carry
; ecx counter,limbs,negative,-(size-1) to -1
; edx scratch
; esi &src[size]
; edi &dst[size-1]
; ebp multiplier
Lmul_1:
mov eax,ebp
mul dword [esi+ecx*4]
add eax,ebx
mov ebx,0
adc ebx,edx
mov [4+edi+ecx*4],eax
inc ecx
jnz Lmul_1
mov [4+edi],ebx
; Addmul src[n]*src[n+1..size-1] at dst[2*n-1...],for each n=1..size-2.
;
; The last two addmuls,which are the bottom right corner of the product
; triangle,are left to the end. These are src[size-3]*src[size-2,size-1]
; and src[size-2]*src[size-1]. If size is 4 then it's only these corner
; cases that need to be done.
;
; The unrolled code is the same as mpn_addmul_1(),see that routine for some
; comments.
;
; VAR_COUNTER is the outer loop,running from -(size-4) to -1,inclusive.
;
; VAR_JMP is the computed jump into the unrolled code,stepped by one code
; chunk each outer loop.
;
; This is also hard-coded in the address calculation below.
;
; With &src[size] and &dst[size-1] pointers,the displacements in the
; unrolled code fit in a byte for UNROLL_COUNT values up to 32,but above
; that an offset must be added to them.
;
; eax
; ebx carry
; ecx
; edx
; esi &src[size]
; edi &dst[size-1]
; ebp
%define CODE_BYTES_PER_LIMB 15
%if UNROLL_COUNT > 32
%define OFFSET UNROLL_COUNT-32
%else
%define OFFSET 0
%endif
mov ecx,[PARAM_SIZE]
sub ecx,4
jz Lcorner
mov edx,ecx
neg ecx
shl ecx,4
%if OFFSET != 0
sub esi,OFFSET
%endif
%ifdef PIC
call Lhere
Lhere:
add ecx,[esp]
add ecx,Lunroll_inner_end-Lhere-2*CODE_BYTES_PER_LIMB
add ecx,edx
add esp,4
%else
lea ecx,[Lunroll_inner_end-2*CODE_BYTES_PER_LIMB+ecx+edx]
%endif
neg edx
%if OFFSET != 0
sub edi,OFFSET
%endif
; The calculated jump mustn't be before the start of the available
; code. This is the limit that UNROLL_COUNT puts on the src operand
; size,but checked here using the jump address directly.
%ifdef ASSERT
mov eax,Lunroll_inner_start
cmp ecx,eax
jae Lunroll_outer_top
jmp exit
%endif
; eax
; ebx high limb to store
; ecx VAR_JMP
; edx VAR_COUNTER,limbs,negative
; esi &src[size],constant
; edi dst ptr,second highest limb of last addmul
; ebp
align 16
Lunroll_outer_top:
mov ebp,[-12+OFFSET+esi+edx*4] ; multiplier
mov [VAR_COUNTER],edx
mov eax,[-8+OFFSET+esi+edx*4] ; first limb of multiplicand
mul ebp
%if UNROLL_COUNT % 2 == 1
%define cmovX cmovz
%else
%define cmovX cmovnz
%endif
test cl,1
mov ebx,edx ; high carry
lea edi,[4+edi]
mov edx,ecx ; jump
mov ecx,eax ; low carry
lea edx,[CODE_BYTES_PER_LIMB+edx]
cmovX ecx,ebx
cmovX ebx,eax
mov [VAR_JMP],edx
jmp edx
; Must be on an even address here so the low bit of the jump address
; will indicate which way around ecx/ebx should start.
; eax scratch
; ebx carry high
; ecx carry low
; edx scratch
; esi src pointer
; edi dst pointer
; ebp multiplier
;
; 15 code bytes each limb
; ecx/ebx reversed on each chunk
align 2
Lunroll_inner_start:
%assign i UNROLL_COUNT
%rep UNROLL_COUNT
%assign disp OFFSET-4*i
%if i % 2 == 0
mov eax,[byte disp+esi]
mul ebp
add [byte disp+edi],ebx
adc ecx,eax
mov ebx,edx
adc ebx,0
%else
; this one comes out last
mov eax,[byte disp+esi]
mul ebp
add [byte disp+edi],ecx
adc ebx,eax
mov ecx,edx
adc ecx,0
%endif
%assign i i-1
%endrep
Lunroll_inner_end:
add [OFFSET+edi],ebx
mov edx,[VAR_COUNTER]
adc ecx,0
mov [OFFSET+4+edi],ecx
mov ecx,[VAR_JMP]
inc edx
jnz Lunroll_outer_top
%if OFFSET != 0
add esi,OFFSET
add edi,OFFSET
%endif
; eax
; ebx
; ecx
; edx
; esi &src[size]
; edi &dst[2*size-5]
; ebp
align 16
Lcorner:
mov eax,[-12+esi]
mul dword [-8+esi]
add [edi],eax
mov eax,[-12+esi]
mov ebx,0
adc ebx,edx
mul dword [-4+esi]
add ebx,eax
mov eax,[-8+esi]
adc edx,0
add [4+edi],ebx
mov ebx,0
adc ebx,edx
mul dword [-4+esi]
mov ecx,[PARAM_SIZE]
add eax,ebx
adc edx,0
mov [8+edi],eax
mov [12+edi],edx
mov edi,[PARAM_DST]
; Left shift of dst[1..2*size-2],the bit shifted out becomes dst[2*size-1].
sub ecx,1 ; size-1
xor eax,eax ; ready for final adcl,and clear carry
mov edx,ecx
mov esi,[PARAM_SRC]
; eax
; ebx
; ecx counter,size-1 to 1
; edx size-1 (for later use)
; esi src (for later use)
; edi dst,incrementing
; ebp
Llshift:
rcl dword [4+edi],1
rcl dword [8+edi],1
lea edi,[8+edi]
dec ecx
jnz Llshift
adc eax,eax
mov [4+edi],eax ; dst most significant limb
mov eax,[esi] ; src[0]
lea esi,[4+esi+edx*4] ; &src[size]
sub ecx,edx ; -(size-1)
; Now add in the squares on the diagonal,src[0]^2,src[1]^2,...,
; src[size-1]^2. dst[0] hasn't yet been set at all yet,and just gets the
; low limb of src[0]^2.
mul eax
mov [edi+ecx*8],eax ; dst[0]
; eax scratch
; ebx scratch
; ecx counter,negative
; edx carry
; esi &src[size]
; edi dst[2*size-2]
; ebp
Ldiag:
mov eax,[esi+ecx*4]
mov ebx,edx
mul eax
add [4+edi+ecx*8],ebx
adc [8+edi+ecx*8],eax
adc edx,0
inc ecx
jnz Ldiag
mov esi,[SAVE_ESI]
mov ebx,[SAVE_EBX]
add [4+edi],edx ; dst most significant limb
mov edi,[SAVE_EDI]
mov ebp,[SAVE_EBP]
add esp,frame
ret
end

View file

@ -1,48 +0,0 @@
; Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
;
; This file is part of the GNU MP Library.
;
; The GNU MP Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License, or (at your option) any later version.
;
; The GNU MP Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the GNU MP Library; see the file COPYING.LIB. If
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
; Suite 330, Boston, MA 02111-1307, USA.
;
; Translation of AT&T syntax code by Brian Gladman
section .text
global ___gmpn_copyd
%ifdef DLL
export ___gmpn_copyd
%endif
align 8
___gmpn_copyd:
mov ecx,[12+esp]
mov eax,[8+esp]
mov edx,[4+esp]
mov [12+esp],ebx
add ecx,-1
js nd
oop:
mov ebx,[eax+ecx*4]
mov [edx+ecx*4],ebx
add ecx,-1
jns oop
nd:
mov ebx,[12+esp]
ret
end

View file

@ -1,63 +0,0 @@
; Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
;
; This file is part of the GNU MP Library.
;
; The GNU MP Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License, or (at your option) any later version.
;
; The GNU MP Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the GNU MP Library; see the file COPYING.LIB. If
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
; Suite 330, Boston, MA 02111-1307, USA.
;
; Translation of AT&T syntax code by Brian Gladman
%include "..\x86i.inc"
section .text
global ___gmpn_copyi
%ifdef DLL
export ___gmpn_copyi
%endif
align 8
___gmpn_copyi:
mov ecx, [12+esp]
cmp ecx, 150
jg rm
mov eax, [8+esp]
mov edx, [4+esp]
mov [12+esp],ebx
test ecx,ecx
jz nd
oop:
mov ebx, [eax]
lea eax, [4+eax]
add ecx, -1
mov [edx],ebx
lea edx, [4+edx]
jnz oop
nd:
mov ebx, [12+esp]
ret
rm:
cld
mov eax,esi
mov esi, [8+esp]
mov edx,edi
mov edi, [4+esp]
rep movsd
mov esi,eax
mov edi,edx
ret
end

View file

@ -1,353 +0,0 @@
; Copyright 2001 Free Software Foundation, Inc.
;
; This file is part of the GNU MP Library.
;
; The GNU MP Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License, or (at your option) any later version.
;
; The GNU MP Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the GNU MP Library; see the file COPYING.LIB. If
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
; Suite 330, Boston, MA 02111-1307, USA.
;
; Translation of AT&T syntax code by Brian Gladman
%include "..\..\x86i.inc"
%define PARAM_SHIFT esp+frame+16
%define PARAM_SIZE esp+frame+12
%define PARAM_SRC esp+frame+8
%define PARAM_DST esp+frame+4
%define frame 8
; minimum 5,because the unrolled loop can't handle less
%define UNROLL_THRESHOLD 5
section .text
global ___gmpn_lshift
%ifdef DLL
export ___gmpn_lshift
%endif
align 8
___gmpn_lshift:
push ebx
push edi
mov eax,[PARAM_SIZE]
mov edx,[PARAM_DST]
mov ebx,[PARAM_SRC]
mov ecx,[PARAM_SHIFT]
cmp eax,UNROLL_THRESHOLD
jae Lunroll
mov edi,[-4+ebx+eax*4] ; src high limb
dec eax
jnz Lsimple
shld eax,edi,cl
shl edi,cl
mov [edx],edi ; dst low limb
pop edi ; risk of data cache bank clash
pop ebx
ret
; eax size-1
; ebx src
; ecx shift
; edx dst
; esi
; edi
; ebp
Lsimple:
movd mm5,[ebx+eax*4] ; src high limb
movd mm6,ecx ; lshift
neg ecx
psllq mm5,mm6
add ecx,32
movd mm7,ecx
psrlq mm5,32 ; retval
; eax counter,limbs,negative
; ebx src
; ecx
; edx dst
; esi
; edi
;
; mm0 scratch
; mm5 return value
; mm6 shift
; mm7 32-shift
Lsimple_top:
movq mm0,[ebx+eax*4-4]
dec eax
psrlq mm0,mm7
movd [4+edx+eax*4],mm0
jnz Lsimple_top
movd mm0,[ebx]
movd eax,mm5
psllq mm0,mm6
pop edi
pop ebx
movd [edx],mm0
emms
ret
; eax size
; ebx src
; ecx shift
; edx dst
; esi
; edi
; ebp
align 8
Lunroll:
movd mm5,[ebx+eax*4-4] ; src high limb
lea edi,[ebx+eax*4]
movd mm6,ecx ; lshift
and edi,4
psllq mm5,mm6
jz Lstart_src_aligned
; src isn't aligned,process high limb separately (marked xxx) to
; make it so.
;
; source -8(ebx,%eax,4)
; |
; +-------+-------+-------+--
; | |
; +-------+-------+-------+--
; 0mod8 4mod8 0mod8
;
; dest
; -4(edx,%eax,4)
; |
; +-------+-------+--
; | xxx | |
; +-------+-------+--
movq mm0,[ebx+eax*4-8] ; unaligned load
psllq mm0,mm6
dec eax
psrlq mm0,32
movd [edx+eax*4],mm0
Lstart_src_aligned:
movq mm1,[ebx+eax*4-8] ; src high qword
lea edi,[edx+eax*4]
and edi,4
psrlq mm5,32 ; return value
movq mm3,[ebx+eax*4-16] ; src second highest qword
jz Lstart_dst_aligned
; dst isn't aligned,subtract 4 to make it so,and pretend the shift
; is 32 bits extra. High limb of dst (marked xxx) handled here
; separately.
;
; source -8(ebx,%eax,4)
; |
; +-------+-------+--
; | mm1 |
; +-------+-------+--
; 0mod8 4mod8
;
; dest
; -4(edx,%eax,4)
; |
; +-------+-------+-------+--
; | xxx | |
; +-------+-------+-------+--
; 0mod8 4mod8 0mod8
movq mm0,mm1
add ecx,32 ; new shift
psllq mm0,mm6
movd mm6,ecx
psrlq mm0,32
; wasted cycle here waiting for %mm0
movd [-4+edx+eax*4],mm0
sub edx,4
Lstart_dst_aligned:
psllq mm1,mm6
neg ecx ; -shift
add ecx,64 ; 64-shift
movq mm2,mm3
movd mm7,ecx
sub eax,8 ; size-8
psrlq mm3,mm7
por mm3,mm1 ; mm3 ready to store
jc Lfinish
; The comments in mpn_rshift apply here too.
; eax counter,limbs
; ebx src
; ecx
; edx dst
; esi
; edi
;
; mm0
; mm1
; mm2 src qword from 16(%ebx,%eax,4)
; mm3 dst qword ready to store to 24(%edx,%eax,4)
;
; mm5 return value
; mm6 lshift
; mm7 rshift
align 8
Lunroll_loop:
movq mm0,[ebx+eax*4+8]
psllq mm2,mm6
movq mm1,mm0
psrlq mm0,mm7
movq [24+edx+eax*4],mm3
por mm0,mm2
movq mm3,[ebx+eax*4]
psllq mm1,mm6
movq [16+edx+eax*4],mm0
movq mm2,mm3
psrlq mm3,mm7
sub eax,4
por mm3,mm1
jnc Lunroll_loop
Lfinish:
; eax -4 to -1 representing respectively 0 to 3 limbs remaining
test al,2
jz Lfinish_no_two
movq mm0,[ebx+eax*4+8]
psllq mm2,mm6
movq mm1,mm0
psrlq mm0,mm7
movq [24+edx+eax*4],mm3 ; prev
por mm0,mm2
movq mm2,mm1
movq mm3,mm0
sub eax,2
Lfinish_no_two:
; eax -4 or -3 representing respectively 0 or 1 limbs remaining
; mm2 src prev qword,from 16(%ebx,%eax,4)
; mm3 dst qword,for 24(%edx,%eax,4)
test al,1
movd eax,mm5 ; retval
pop edi
jz Lfinish_zero
; One extra src limb,destination was aligned.
;
; source ebx
; --+---------------+-------+
; | mm2 | |
; --+---------------+-------+
;
; dest edx+12 edx+4 edx
; --+---------------+---------------+-------+
; | mm3 | | |
; --+---------------+---------------+-------+
;
; mm6 = shift
; mm7 = ecx = 64-shift
; One extra src limb,destination was unaligned.
;
; source ebx
; --+---------------+-------+
; | mm2 | |
; --+---------------+-------+
;
; dest edx+12 edx+4
; --+---------------+---------------+
; | mm3 | |
; --+---------------+---------------+
;
; mm6 = shift+32
; mm7 = ecx = 64-(shift+32)
; In both cases there's one extra limb of src to fetch and combine
; with mm2 to make a qword at 4(%edx),and in the aligned case
; there's an extra limb of dst to be formed from that extra src limb
; left shifted.
movd mm0,[ebx]
psllq mm2,mm6
movq [12+edx],mm3
psllq mm0,32
movq mm1,mm0
psrlq mm0,mm7
por mm0,mm2
psllq mm1,mm6
movq [4+edx],mm0
psrlq mm1,32
and ecx,32
pop ebx
jz Lfinish_one_unaligned
movd [edx],mm1
Lfinish_one_unaligned:
emms
ret
Lfinish_zero:
; No extra src limbs,destination was aligned.
;
; source ebx
; --+---------------+
; | mm2 |
; --+---------------+
;
; dest edx+8 edx
; --+---------------+---------------+
; | mm3 | |
; --+---------------+---------------+
;
; mm6 = shift
; mm7 = ecx = 64-shift
; No extra src limbs,destination was unaligned.
;
; source ebx
; --+---------------+
; | mm2 |
; --+---------------+
;
; dest edx+8 edx+4
; --+---------------+-------+
; | mm3 | |
; --+---------------+-------+
;
; mm6 = shift+32
; mm7 = ecx = 64-(shift+32)
; The movd for the unaligned case writes the same data to 4(%edx)
; that the movq does for the aligned case.
movq [8+edx],mm3
and ecx,32
psllq mm2,mm6
jz Lfinish_zero_unaligned
movq [edx],mm2
Lfinish_zero_unaligned:
psrlq mm2,32
pop ebx
movd eax,mm5 ; retval
movd [4+edx],mm2
emms
ret
end

View file

@ -1,126 +0,0 @@
; Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
;
; This file is part of the GNU MP Library.
;
; The GNU MP Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License, or (at your option) any later version.
;
; The GNU MP Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the GNU MP Library; see the file COPYING.LIB. If
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
; Suite 330, Boston, MA 02111-1307, USA.
;
; Translation of AT&T syntax code by Brian Gladman
%macro ph_fun 1
%ifdef PIC
mov edx,0xAAAAAAAA
movd mm7,edx
punpckldq mm7,mm7
mov edx,0x33333333
movd mm6,edx
punpckldq mm6,mm6
mov edx,0x0F0F0F0F
movd mm5,edx
punpckldq mm5,mm5
%else
movq mm7,[L_AA]
movq mm6,[L_33]
movq mm5,[L_0F]
%endif
mov ecx,[esp+PARAM_SIZE]
mov eax,[esp+PARAM_SRC]
%if %1 == 1
mov edx,[esp+PARAM_SRC2]
%endif
pxor mm4,mm4
pxor mm0,mm0
sub ecx,1
ja %%L_top
%%L_last:
movd mm1,[eax+ecx*4]
%if %1 == 1
movd mm2,[edx+ecx*4]
pxor mm1,mm2
%endif
jmp %%L_loaded
%%L_top:
movd mm1,[eax]
movd mm2,[eax+4]
punpckldq mm1,mm2
add eax,8
%if %1 == 1
movd mm2,[edx]
movd mm3,[edx+4]
punpckldq mm2,mm3
pxor mm1,mm2
add edx,8
%endif
%%L_loaded:
movq mm2,mm7
pand mm2,mm1
psrlq mm2,1
psubd mm1,mm2
movq mm2,mm6
pand mm2,mm1
psrlq mm1,2
pand mm1,mm6
paddd mm1,mm2
movq mm2,mm5
pand mm2,mm1
psrlq mm1,4
pand mm1,mm5
paddd mm1,mm2
psadbw mm1,mm4
paddd mm0,mm1
sub ecx,2
jg %%L_top
jz %%L_last
movd eax,mm0
emms
ret
%endmacro
%ifndef PIC
data
align 8
L_AA: dq 0xAAAAAAAAAAAAAAAA
L_33: dq 0x3333333333333333
L_0F: dq 0x0F0F0F0F0F0F0F0F
%endif
text
%define PARAM_SIZE 8
%define PARAM_SRC 4
global ___gmpn_popcount
%ifdef DLL
export ___gmpn_popcount
%endif
align 16
___gmpn_popcount:
ph_fun 0
%define PARAM_SIZE 12
%define PARAM_SRC2 8
%define PARAM_SRC 4
global ___gmpn_hamdist
%ifdef DLL
export ___gmpn_hamdist
%endif
align 16
___gmpn_hamdist:
ph_fun 1
end

Some files were not shown because too many files have changed in this diff Show more