mirror of
https://gitlab.com/embeddable-common-lisp/ecl.git
synced 2026-01-02 07:30:55 -08:00
Add more files from GMP 4.1.2
This commit is contained in:
parent
4b11900381
commit
1d2c4cd6f0
17 changed files with 1611 additions and 0 deletions
98
src/gmp/mpn/pa32/hppa1_1/addmul_1.asm
Normal file
98
src/gmp/mpn/pa32/hppa1_1/addmul_1.asm
Normal file
|
|
@ -0,0 +1,98 @@
|
|||
dnl HP-PA 1.1 mpn_addmul_1 -- Multiply a limb vector with a limb and add the
|
||||
dnl result to a second limb vector.
|
||||
|
||||
dnl Copyright 1992, 1993, 1994, 2000, 2001, 2002 Free Software Foundation,
|
||||
dnl Inc.
|
||||
|
||||
dnl This file is part of the GNU MP Library.
|
||||
|
||||
dnl The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
dnl it under the terms of the GNU Lesser General Public License as published
|
||||
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
|
||||
dnl your option) any later version.
|
||||
|
||||
dnl The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||
dnl License for more details.
|
||||
|
||||
dnl You should have received a copy of the GNU Lesser General Public License
|
||||
dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
|
||||
dnl MA 02111-1307, USA.
|
||||
|
||||
include(`../config.m4')
|
||||
|
||||
C INPUT PARAMETERS
|
||||
C res_ptr r26
|
||||
C s1_ptr r25
|
||||
C size r24
|
||||
C s2_limb r23
|
||||
|
||||
C This runs at 11 cycles/limb on a PA7000. With the used instructions, it can
|
||||
C not become faster due to data cache contention after a store. On the PA7100
|
||||
C it runs at 10 cycles/limb.
|
||||
|
||||
C There are some ideas described in mul_1.asm that applies to this code too.
|
||||
|
||||
ASM_START()
|
||||
PROLOGUE(mpn_addmul_1)
|
||||
C .callinfo frame=64,no_calls
|
||||
|
||||
ldo 64(%r30),%r30
|
||||
fldws,ma 4(%r25),%fr5
|
||||
stw %r23,-16(%r30) C move s2_limb ...
|
||||
addib,= -1,%r24,L(just_one_limb)
|
||||
fldws -16(%r30),%fr4 C ... into fr4
|
||||
add %r0,%r0,%r0 C clear carry
|
||||
xmpyu %fr4,%fr5,%fr6
|
||||
fldws,ma 4(%r25),%fr7
|
||||
fstds %fr6,-16(%r30)
|
||||
xmpyu %fr4,%fr7,%fr8
|
||||
ldw -12(%r30),%r19 C least significant limb in product
|
||||
ldw -16(%r30),%r28
|
||||
|
||||
fstds %fr8,-16(%r30)
|
||||
addib,= -1,%r24,L(end)
|
||||
ldw -12(%r30),%r1
|
||||
|
||||
C Main loop
|
||||
.label L(loop)
|
||||
ldws 0(%r26),%r29
|
||||
fldws,ma 4(%r25),%fr5
|
||||
add %r29,%r19,%r19
|
||||
stws,ma %r19,4(%r26)
|
||||
addc %r28,%r1,%r19
|
||||
xmpyu %fr4,%fr5,%fr6
|
||||
ldw -16(%r30),%r28
|
||||
fstds %fr6,-16(%r30)
|
||||
addc %r0,%r28,%r28
|
||||
addib,<> -1,%r24,L(loop)
|
||||
ldw -12(%r30),%r1
|
||||
|
||||
.label L(end)
|
||||
ldw 0(%r26),%r29
|
||||
add %r29,%r19,%r19
|
||||
stws,ma %r19,4(%r26)
|
||||
addc %r28,%r1,%r19
|
||||
ldw -16(%r30),%r28
|
||||
ldws 0(%r26),%r29
|
||||
addc %r0,%r28,%r28
|
||||
add %r29,%r19,%r19
|
||||
stws,ma %r19,4(%r26)
|
||||
addc %r0,%r28,%r28
|
||||
bv 0(%r2)
|
||||
ldo -64(%r30),%r30
|
||||
|
||||
.label L(just_one_limb)
|
||||
xmpyu %fr4,%fr5,%fr6
|
||||
ldw 0(%r26),%r29
|
||||
fstds %fr6,-16(%r30)
|
||||
ldw -12(%r30),%r1
|
||||
ldw -16(%r30),%r28
|
||||
add %r29,%r1,%r19
|
||||
stw %r19,0(%r26)
|
||||
addc %r0,%r28,%r28
|
||||
bv 0(%r2)
|
||||
ldo -64(%r30),%r30
|
||||
EPILOGUE()
|
||||
63
src/gmp/mpn/pa32/hppa1_1/gmp-mparam.h
Normal file
63
src/gmp/mpn/pa32/hppa1_1/gmp-mparam.h
Normal file
|
|
@ -0,0 +1,63 @@
|
|||
/* HP-PA 1.1 gmp-mparam.h -- Compiler/machine parameter header file.
|
||||
|
||||
Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002 Free Software Foundation,
|
||||
Inc.
|
||||
|
||||
This file is part of the GNU MP Library.
|
||||
|
||||
The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation; either version 2.1 of the License, or (at your
|
||||
option) any later version.
|
||||
|
||||
The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||
License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
|
||||
MA 02111-1307, USA. */
|
||||
|
||||
#define BITS_PER_MP_LIMB 32
|
||||
#define BYTES_PER_MP_LIMB 4
|
||||
|
||||
/* Generated by tuneup.c, 2002-03-07, gcc 2.8 (pa7100/100MHz) */
|
||||
|
||||
#define MUL_KARATSUBA_THRESHOLD 30
|
||||
#define MUL_TOOM3_THRESHOLD 141
|
||||
|
||||
#define SQR_BASECASE_THRESHOLD 4
|
||||
#define SQR_KARATSUBA_THRESHOLD 55
|
||||
#define SQR_TOOM3_THRESHOLD 185
|
||||
|
||||
#define DIV_SB_PREINV_THRESHOLD 0 /* always */
|
||||
#define DIV_DC_THRESHOLD 95
|
||||
#define POWM_THRESHOLD 150
|
||||
|
||||
#define GCD_ACCEL_THRESHOLD 3
|
||||
#define GCDEXT_THRESHOLD 0 /* always */
|
||||
#define JACOBI_BASE_METHOD 2
|
||||
|
||||
#define DIVREM_1_NORM_THRESHOLD 3
|
||||
#define DIVREM_1_UNNORM_THRESHOLD 6
|
||||
#define MOD_1_NORM_THRESHOLD 3
|
||||
#define MOD_1_UNNORM_THRESHOLD 6
|
||||
#define USE_PREINV_DIVREM_1 1
|
||||
#define USE_PREINV_MOD_1 1
|
||||
#define DIVREM_2_THRESHOLD 0 /* always */
|
||||
#define DIVEXACT_1_THRESHOLD 0 /* always */
|
||||
#define MODEXACT_1_ODD_THRESHOLD 0 /* always */
|
||||
|
||||
#define GET_STR_DC_THRESHOLD 13
|
||||
#define GET_STR_PRECOMPUTE_THRESHOLD 23
|
||||
#define SET_STR_THRESHOLD 6589
|
||||
|
||||
#define MUL_FFT_TABLE { 592, 1440, 2688, 5632, 14336, 40960, 0 }
|
||||
#define MUL_FFT_MODF_THRESHOLD 608
|
||||
#define MUL_FFT_THRESHOLD 5888
|
||||
|
||||
#define SQR_FFT_TABLE { 624, 1504, 2688, 6656, 18432, 40960, 0 }
|
||||
#define SQR_FFT_MODF_THRESHOLD 640
|
||||
#define SQR_FFT_THRESHOLD 5376
|
||||
94
src/gmp/mpn/pa32/hppa1_1/mul_1.asm
Normal file
94
src/gmp/mpn/pa32/hppa1_1/mul_1.asm
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
dnl HP-PA 1.1 mpn_mul_1 -- Multiply a limb vector with a limb and store the
|
||||
dnl result in a second limb vector.
|
||||
|
||||
dnl Copyright 1992, 1993, 1994, 2000, 2001, 2002 Free Software Foundation,
|
||||
dnl Inc.
|
||||
|
||||
dnl This file is part of the GNU MP Library.
|
||||
|
||||
dnl The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
dnl it under the terms of the GNU Lesser General Public License as published
|
||||
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
|
||||
dnl your option) any later version.
|
||||
|
||||
dnl The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||
dnl License for more details.
|
||||
|
||||
dnl You should have received a copy of the GNU Lesser General Public License
|
||||
dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
|
||||
dnl MA 02111-1307, USA.
|
||||
|
||||
include(`../config.m4')
|
||||
|
||||
C INPUT PARAMETERS
|
||||
C res_ptr r26
|
||||
C s1_ptr r25
|
||||
C size r24
|
||||
C s2_limb r23
|
||||
|
||||
C This runs at 9 cycles/limb on a PA7000. With the used instructions, it can
|
||||
C not become faster due to data cache contention after a store. On the PA7100
|
||||
C it runs at 7 cycles/limb.
|
||||
|
||||
C We could use fldds to read two limbs at a time from the S1 array, and that
|
||||
C could bring down the times to 8.5 and 6.5 cycles/limb for the PA7000 and
|
||||
C PA7100, respectively. We don't do that since it does not seem worth the
|
||||
C (alignment) troubles...
|
||||
|
||||
C At least the PA7100 is rumored to be able to deal with cache-misses without
|
||||
C stalling instruction issue. If this is true, and the cache is actually also
|
||||
C lockup-free, we should use a deeper software pipeline, and load from S1 very
|
||||
C early! (The loads and stores to -12(sp) will surely be in the cache.)
|
||||
|
||||
ASM_START()
|
||||
PROLOGUE(mpn_mul_1)
|
||||
C .callinfo frame=64,no_calls
|
||||
|
||||
ldo 64(%r30),%r30
|
||||
fldws,ma 4(%r25),%fr5
|
||||
stw %r23,-16(%r30) C move s2_limb ...
|
||||
addib,= -1,%r24,L(just_one_limb)
|
||||
fldws -16(%r30),%fr4 C ... into fr4
|
||||
add %r0,%r0,%r0 C clear carry
|
||||
xmpyu %fr4,%fr5,%fr6
|
||||
fldws,ma 4(%r25),%fr7
|
||||
fstds %fr6,-16(%r30)
|
||||
xmpyu %fr4,%fr7,%fr8
|
||||
ldw -12(%r30),%r19 C least significant limb in product
|
||||
ldw -16(%r30),%r28
|
||||
|
||||
fstds %fr8,-16(%r30)
|
||||
addib,= -1,%r24,L(end)
|
||||
ldw -12(%r30),%r1
|
||||
|
||||
C Main loop
|
||||
.label L(loop)
|
||||
fldws,ma 4(%r25),%fr5
|
||||
stws,ma %r19,4(%r26)
|
||||
addc %r28,%r1,%r19
|
||||
xmpyu %fr4,%fr5,%fr6
|
||||
ldw -16(%r30),%r28
|
||||
fstds %fr6,-16(%r30)
|
||||
addib,<> -1,%r24,L(loop)
|
||||
ldw -12(%r30),%r1
|
||||
|
||||
.label L(end)
|
||||
stws,ma %r19,4(%r26)
|
||||
addc %r28,%r1,%r19
|
||||
ldw -16(%r30),%r28
|
||||
stws,ma %r19,4(%r26)
|
||||
addc %r0,%r28,%r28
|
||||
bv 0(%r2)
|
||||
ldo -64(%r30),%r30
|
||||
|
||||
.label L(just_one_limb)
|
||||
xmpyu %fr4,%fr5,%fr6
|
||||
fstds %fr6,-16(%r30)
|
||||
ldw -16(%r30),%r28
|
||||
ldo -64(%r30),%r30
|
||||
bv 0(%r2)
|
||||
fstws %fr6R,0(%r26)
|
||||
EPILOGUE()
|
||||
74
src/gmp/mpn/pa32/hppa1_1/pa7100/add_n.asm
Normal file
74
src/gmp/mpn/pa32/hppa1_1/pa7100/add_n.asm
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
dnl HP-PA mpn_add_n -- Add two limb vectors of the same length > 0 and store
|
||||
dnl sum in a third limb vector. Optimized for the PA7100, where is runs at
|
||||
dnl 4.25 cycles/limb.
|
||||
|
||||
dnl Copyright 1992, 1994, 2000, 2001, 2002 Free Software Foundation, Inc.
|
||||
|
||||
dnl This file is part of the GNU MP Library.
|
||||
|
||||
dnl The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
dnl it under the terms of the GNU Lesser General Public License as published
|
||||
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
|
||||
dnl your option) any later version.
|
||||
|
||||
dnl The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||
dnl License for more details.
|
||||
|
||||
dnl You should have received a copy of the GNU Lesser General Public License
|
||||
dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
|
||||
dnl MA 02111-1307, USA.
|
||||
|
||||
include(`../config.m4')
|
||||
|
||||
C INPUT PARAMETERS
|
||||
C res_ptr r26
|
||||
C s1_ptr r25
|
||||
C s2_ptr r24
|
||||
C size r23
|
||||
|
||||
ASM_START()
|
||||
PROLOGUE(mpn_add_n)
|
||||
ldws,ma 4(0,%r25),%r20
|
||||
ldws,ma 4(0,%r24),%r19
|
||||
|
||||
addib,<= -5,%r23,L(rest)
|
||||
add %r20,%r19,%r28 C add first limbs ignoring cy
|
||||
|
||||
.label L(loop)
|
||||
ldws,ma 4(0,%r25),%r20
|
||||
ldws,ma 4(0,%r24),%r19
|
||||
stws,ma %r28,4(0,%r26)
|
||||
addc %r20,%r19,%r28
|
||||
ldws,ma 4(0,%r25),%r20
|
||||
ldws,ma 4(0,%r24),%r19
|
||||
stws,ma %r28,4(0,%r26)
|
||||
addc %r20,%r19,%r28
|
||||
ldws,ma 4(0,%r25),%r20
|
||||
ldws,ma 4(0,%r24),%r19
|
||||
stws,ma %r28,4(0,%r26)
|
||||
addc %r20,%r19,%r28
|
||||
ldws,ma 4(0,%r25),%r20
|
||||
ldws,ma 4(0,%r24),%r19
|
||||
stws,ma %r28,4(0,%r26)
|
||||
addib,> -4,%r23,L(loop)
|
||||
addc %r20,%r19,%r28
|
||||
|
||||
.label L(rest)
|
||||
addib,= 4,%r23,L(end)
|
||||
nop
|
||||
|
||||
.label L(eloop)
|
||||
ldws,ma 4(0,%r25),%r20
|
||||
ldws,ma 4(0,%r24),%r19
|
||||
stws,ma %r28,4(0,%r26)
|
||||
addib,> -1,%r23,L(eloop)
|
||||
addc %r20,%r19,%r28
|
||||
|
||||
.label L(end)
|
||||
stws %r28,0(0,%r26)
|
||||
bv 0(%r2)
|
||||
addc %r0,%r0,%r28
|
||||
EPILOGUE()
|
||||
192
src/gmp/mpn/pa32/hppa1_1/pa7100/addmul_1.asm
Normal file
192
src/gmp/mpn/pa32/hppa1_1/pa7100/addmul_1.asm
Normal file
|
|
@ -0,0 +1,192 @@
|
|||
dnl HP-PA 7100/7200 mpn_addmul_1 -- Multiply a limb vector with a limb and
|
||||
dnl add the result to a second limb vector.
|
||||
|
||||
dnl Copyright 1995, 2000, 2001, 2002 Free Software Foundation, Inc.
|
||||
|
||||
dnl This file is part of the GNU MP Library.
|
||||
|
||||
dnl The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
dnl it under the terms of the GNU Lesser General Public License as published
|
||||
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
|
||||
dnl your option) any later version.
|
||||
|
||||
dnl The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||
dnl License for more details.
|
||||
|
||||
dnl You should have received a copy of the GNU Lesser General Public License
|
||||
dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
|
||||
dnl MA 02111-1307, USA.
|
||||
|
||||
include(`../config.m4')
|
||||
|
||||
C INPUT PARAMETERS
|
||||
define(`res_ptr',`%r26')
|
||||
define(`s1_ptr',`%r25')
|
||||
define(`size_param',`%r24')
|
||||
define(`s2_limb',`%r23')
|
||||
|
||||
define(`cylimb',`%r28')
|
||||
define(`s0',`%r19')
|
||||
define(`s1',`%r20')
|
||||
define(`s2',`%r3')
|
||||
define(`s3',`%r4')
|
||||
define(`lo0',`%r21')
|
||||
define(`lo1',`%r5')
|
||||
define(`lo2',`%r6')
|
||||
define(`lo3',`%r7')
|
||||
define(`hi0',`%r22')
|
||||
define(`hi1',`%r23') C safe to reuse
|
||||
define(`hi2',`%r29')
|
||||
define(`hi3',`%r1')
|
||||
|
||||
ASM_START()
|
||||
PROLOGUE(mpn_addmul_1)
|
||||
C .callinfo frame=128,no_calls
|
||||
|
||||
ldo 128(%r30),%r30
|
||||
stws s2_limb,-16(%r30)
|
||||
add %r0,%r0,cylimb C clear cy and cylimb
|
||||
addib,< -4,size_param,L(few_limbs)
|
||||
fldws -16(%r30),%fr31R
|
||||
|
||||
ldo -112(%r30),%r31
|
||||
stw %r3,-96(%r30)
|
||||
stw %r4,-92(%r30)
|
||||
stw %r5,-88(%r30)
|
||||
stw %r6,-84(%r30)
|
||||
stw %r7,-80(%r30)
|
||||
|
||||
bb,>=,n s1_ptr,29,L(0)
|
||||
|
||||
fldws,ma 4(s1_ptr),%fr4
|
||||
ldws 0(res_ptr),s0
|
||||
xmpyu %fr4,%fr31R,%fr5
|
||||
fstds %fr5,-16(%r31)
|
||||
ldws -16(%r31),cylimb
|
||||
ldws -12(%r31),lo0
|
||||
add s0,lo0,s0
|
||||
addib,< -1,size_param,L(few_limbs)
|
||||
stws,ma s0,4(res_ptr)
|
||||
|
||||
C start software pipeline ----------------------------------------------------
|
||||
.label L(0)
|
||||
fldds,ma 8(s1_ptr),%fr4
|
||||
fldds,ma 8(s1_ptr),%fr8
|
||||
|
||||
xmpyu %fr4L,%fr31R,%fr5
|
||||
xmpyu %fr4R,%fr31R,%fr6
|
||||
xmpyu %fr8L,%fr31R,%fr9
|
||||
xmpyu %fr8R,%fr31R,%fr10
|
||||
|
||||
fstds %fr5,-16(%r31)
|
||||
fstds %fr6,-8(%r31)
|
||||
fstds %fr9,0(%r31)
|
||||
fstds %fr10,8(%r31)
|
||||
|
||||
ldws -16(%r31),hi0
|
||||
ldws -12(%r31),lo0
|
||||
ldws -8(%r31),hi1
|
||||
ldws -4(%r31),lo1
|
||||
ldws 0(%r31),hi2
|
||||
ldws 4(%r31),lo2
|
||||
ldws 8(%r31),hi3
|
||||
ldws 12(%r31),lo3
|
||||
|
||||
addc lo0,cylimb,lo0
|
||||
addc lo1,hi0,lo1
|
||||
addc lo2,hi1,lo2
|
||||
addc lo3,hi2,lo3
|
||||
|
||||
addib,< -4,size_param,L(end)
|
||||
addc %r0,hi3,cylimb C propagate carry into cylimb
|
||||
C main loop ------------------------------------------------------------------
|
||||
.label L(loop)
|
||||
fldds,ma 8(s1_ptr),%fr4
|
||||
fldds,ma 8(s1_ptr),%fr8
|
||||
|
||||
ldws 0(res_ptr),s0
|
||||
xmpyu %fr4L,%fr31R,%fr5
|
||||
ldws 4(res_ptr),s1
|
||||
xmpyu %fr4R,%fr31R,%fr6
|
||||
ldws 8(res_ptr),s2
|
||||
xmpyu %fr8L,%fr31R,%fr9
|
||||
ldws 12(res_ptr),s3
|
||||
xmpyu %fr8R,%fr31R,%fr10
|
||||
|
||||
fstds %fr5,-16(%r31)
|
||||
add s0,lo0,s0
|
||||
fstds %fr6,-8(%r31)
|
||||
addc s1,lo1,s1
|
||||
fstds %fr9,0(%r31)
|
||||
addc s2,lo2,s2
|
||||
fstds %fr10,8(%r31)
|
||||
addc s3,lo3,s3
|
||||
|
||||
ldws -16(%r31),hi0
|
||||
ldws -12(%r31),lo0
|
||||
ldws -8(%r31),hi1
|
||||
ldws -4(%r31),lo1
|
||||
ldws 0(%r31),hi2
|
||||
ldws 4(%r31),lo2
|
||||
ldws 8(%r31),hi3
|
||||
ldws 12(%r31),lo3
|
||||
|
||||
addc lo0,cylimb,lo0
|
||||
stws,ma s0,4(res_ptr)
|
||||
addc lo1,hi0,lo1
|
||||
stws,ma s1,4(res_ptr)
|
||||
addc lo2,hi1,lo2
|
||||
stws,ma s2,4(res_ptr)
|
||||
addc lo3,hi2,lo3
|
||||
stws,ma s3,4(res_ptr)
|
||||
|
||||
addib,>= -4,size_param,L(loop)
|
||||
addc %r0,hi3,cylimb C propagate carry into cylimb
|
||||
C finish software pipeline ---------------------------------------------------
|
||||
.label L(end)
|
||||
ldws 0(res_ptr),s0
|
||||
ldws 4(res_ptr),s1
|
||||
ldws 8(res_ptr),s2
|
||||
ldws 12(res_ptr),s3
|
||||
|
||||
add s0,lo0,s0
|
||||
stws,ma s0,4(res_ptr)
|
||||
addc s1,lo1,s1
|
||||
stws,ma s1,4(res_ptr)
|
||||
addc s2,lo2,s2
|
||||
stws,ma s2,4(res_ptr)
|
||||
addc s3,lo3,s3
|
||||
stws,ma s3,4(res_ptr)
|
||||
|
||||
C restore callee-saves registers ---------------------------------------------
|
||||
ldw -96(%r30),%r3
|
||||
ldw -92(%r30),%r4
|
||||
ldw -88(%r30),%r5
|
||||
ldw -84(%r30),%r6
|
||||
ldw -80(%r30),%r7
|
||||
|
||||
.label L(few_limbs)
|
||||
addib,=,n 4,size_param,L(ret)
|
||||
|
||||
.label L(loop2)
|
||||
fldws,ma 4(s1_ptr),%fr4
|
||||
ldws 0(res_ptr),s0
|
||||
xmpyu %fr4,%fr31R,%fr5
|
||||
fstds %fr5,-16(%r30)
|
||||
ldws -16(%r30),hi0
|
||||
ldws -12(%r30),lo0
|
||||
addc lo0,cylimb,lo0
|
||||
addc %r0,hi0,cylimb
|
||||
add s0,lo0,s0
|
||||
stws,ma s0,4(res_ptr)
|
||||
addib,<> -1,size_param,L(loop2)
|
||||
nop
|
||||
|
||||
.label L(ret)
|
||||
addc %r0,cylimb,cylimb
|
||||
bv 0(%r2)
|
||||
ldo -128(%r30),%r30
|
||||
EPILOGUE(mpn_addmul_1)
|
||||
86
src/gmp/mpn/pa32/hppa1_1/pa7100/lshift.asm
Normal file
86
src/gmp/mpn/pa32/hppa1_1/pa7100/lshift.asm
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
dnl HP-PA mpn_lshift -- Shift a number left.
|
||||
dnl Optimized for the PA7100, where is runs at 3.25 cycles/limb.
|
||||
|
||||
dnl Copyright 1992, 1994, 2000, 2001, 2002 Free Software Foundation, Inc.
|
||||
|
||||
dnl This file is part of the GNU MP Library.
|
||||
|
||||
dnl The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
dnl it under the terms of the GNU Lesser General Public License as published
|
||||
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
|
||||
dnl your option) any later version.
|
||||
|
||||
dnl The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||
dnl License for more details.
|
||||
|
||||
dnl You should have received a copy of the GNU Lesser General Public License
|
||||
dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
|
||||
dnl MA 02111-1307, USA.
|
||||
|
||||
include(`../config.m4')
|
||||
|
||||
C INPUT PARAMETERS
|
||||
C res_ptr r26
|
||||
C s_ptr r25
|
||||
C size r24
|
||||
C cnt r23
|
||||
|
||||
ASM_START()
|
||||
PROLOGUE(mpn_lshift)
|
||||
sh2add %r24,%r25,%r25
|
||||
sh2add %r24,%r26,%r26
|
||||
ldws,mb -4(0,%r25),%r22
|
||||
subi 32,%r23,%r1
|
||||
mtsar %r1
|
||||
addib,= -1,%r24,L(0004)
|
||||
vshd %r0,%r22,%r28 C compute carry out limb
|
||||
ldws,mb -4(0,%r25),%r29
|
||||
addib,<= -5,%r24,L(rest)
|
||||
vshd %r22,%r29,%r20
|
||||
|
||||
.label L(loop)
|
||||
ldws,mb -4(0,%r25),%r22
|
||||
stws,mb %r20,-4(0,%r26)
|
||||
vshd %r29,%r22,%r20
|
||||
ldws,mb -4(0,%r25),%r29
|
||||
stws,mb %r20,-4(0,%r26)
|
||||
vshd %r22,%r29,%r20
|
||||
ldws,mb -4(0,%r25),%r22
|
||||
stws,mb %r20,-4(0,%r26)
|
||||
vshd %r29,%r22,%r20
|
||||
ldws,mb -4(0,%r25),%r29
|
||||
stws,mb %r20,-4(0,%r26)
|
||||
addib,> -4,%r24,L(loop)
|
||||
vshd %r22,%r29,%r20
|
||||
|
||||
.label L(rest)
|
||||
addib,= 4,%r24,L(end1)
|
||||
nop
|
||||
|
||||
.label L(eloop)
|
||||
ldws,mb -4(0,%r25),%r22
|
||||
stws,mb %r20,-4(0,%r26)
|
||||
addib,<= -1,%r24,L(end2)
|
||||
vshd %r29,%r22,%r20
|
||||
ldws,mb -4(0,%r25),%r29
|
||||
stws,mb %r20,-4(0,%r26)
|
||||
addib,> -1,%r24,L(eloop)
|
||||
vshd %r22,%r29,%r20
|
||||
|
||||
.label L(end1)
|
||||
stws,mb %r20,-4(0,%r26)
|
||||
vshd %r29,%r0,%r20
|
||||
bv 0(%r2)
|
||||
stw %r20,-4(0,%r26)
|
||||
|
||||
.label L(end2)
|
||||
stws,mb %r20,-4(0,%r26)
|
||||
|
||||
.label L(0004)
|
||||
vshd %r22,%r0,%r20
|
||||
bv 0(%r2)
|
||||
stw %r20,-4(0,%r26)
|
||||
EPILOGUE()
|
||||
83
src/gmp/mpn/pa32/hppa1_1/pa7100/rshift.asm
Normal file
83
src/gmp/mpn/pa32/hppa1_1/pa7100/rshift.asm
Normal file
|
|
@ -0,0 +1,83 @@
|
|||
dnl HP-PA mpn_rshift -- Shift a number right.
|
||||
dnl Optimized for the PA7100, where is runs at 3.25 cycles/limb.
|
||||
|
||||
dnl Copyright 1992, 1994, 2000, 2001, 2002 Free Software Foundation, Inc.
|
||||
|
||||
dnl This file is part of the GNU MP Library.
|
||||
|
||||
dnl The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
dnl it under the terms of the GNU Lesser General Public License as published
|
||||
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
|
||||
dnl your option) any later version.
|
||||
|
||||
dnl The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||
dnl License for more details.
|
||||
|
||||
dnl You should have received a copy of the GNU Lesser General Public License
|
||||
dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
|
||||
dnl MA 02111-1307, USA.
|
||||
|
||||
include(`../config.m4')
|
||||
|
||||
C INPUT PARAMETERS
|
||||
C res_ptr r26
|
||||
C s_ptr r25
|
||||
C size r24
|
||||
C cnt r23
|
||||
|
||||
ASM_START()
|
||||
PROLOGUE(mpn_rshift)
|
||||
ldws,ma 4(0,%r25),%r22
|
||||
mtsar %r23
|
||||
addib,= -1,%r24,L(0004)
|
||||
vshd %r22,%r0,%r28 C compute carry out limb
|
||||
ldws,ma 4(0,%r25),%r29
|
||||
addib,<= -5,%r24,L(rest)
|
||||
vshd %r29,%r22,%r20
|
||||
|
||||
.label L(loop)
|
||||
ldws,ma 4(0,%r25),%r22
|
||||
stws,ma %r20,4(0,%r26)
|
||||
vshd %r22,%r29,%r20
|
||||
ldws,ma 4(0,%r25),%r29
|
||||
stws,ma %r20,4(0,%r26)
|
||||
vshd %r29,%r22,%r20
|
||||
ldws,ma 4(0,%r25),%r22
|
||||
stws,ma %r20,4(0,%r26)
|
||||
vshd %r22,%r29,%r20
|
||||
ldws,ma 4(0,%r25),%r29
|
||||
stws,ma %r20,4(0,%r26)
|
||||
addib,> -4,%r24,L(loop)
|
||||
vshd %r29,%r22,%r20
|
||||
|
||||
.label L(rest)
|
||||
addib,= 4,%r24,L(end1)
|
||||
nop
|
||||
|
||||
.label L(eloop)
|
||||
ldws,ma 4(0,%r25),%r22
|
||||
stws,ma %r20,4(0,%r26)
|
||||
addib,<= -1,%r24,L(end2)
|
||||
vshd %r22,%r29,%r20
|
||||
ldws,ma 4(0,%r25),%r29
|
||||
stws,ma %r20,4(0,%r26)
|
||||
addib,> -1,%r24,L(eloop)
|
||||
vshd %r29,%r22,%r20
|
||||
|
||||
.label L(end1)
|
||||
stws,ma %r20,4(0,%r26)
|
||||
vshd %r0,%r29,%r20
|
||||
bv 0(%r2)
|
||||
stw %r20,0(0,%r26)
|
||||
|
||||
.label L(end2)
|
||||
stws,ma %r20,4(0,%r26)
|
||||
|
||||
.label L(0004)
|
||||
vshd %r0,%r22,%r20
|
||||
bv 0(%r2)
|
||||
stw %r20,0(0,%r26)
|
||||
EPILOGUE()
|
||||
75
src/gmp/mpn/pa32/hppa1_1/pa7100/sub_n.asm
Normal file
75
src/gmp/mpn/pa32/hppa1_1/pa7100/sub_n.asm
Normal file
|
|
@ -0,0 +1,75 @@
|
|||
dnl HP-PA mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
|
||||
dnl store difference in a third limb vector. Optimized for the PA7100, where
|
||||
dnl is runs at 4.25 cycles/limb.
|
||||
|
||||
dnl Copyright 1992, 1994, 2000, 2001, 2002 Free Software Foundation, Inc.
|
||||
|
||||
dnl This file is part of the GNU MP Library.
|
||||
|
||||
dnl The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
dnl it under the terms of the GNU Lesser General Public License as published
|
||||
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
|
||||
dnl your option) any later version.
|
||||
|
||||
dnl The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||
dnl License for more details.
|
||||
|
||||
dnl You should have received a copy of the GNU Lesser General Public License
|
||||
dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
|
||||
dnl MA 02111-1307, USA.
|
||||
|
||||
include(`../config.m4')
|
||||
|
||||
C INPUT PARAMETERS
|
||||
C res_ptr r26
|
||||
C s1_ptr r25
|
||||
C s2_ptr r24
|
||||
C size r23
|
||||
|
||||
ASM_START()
|
||||
PROLOGUE(mpn_sub_n)
|
||||
ldws,ma 4(0,%r25),%r20
|
||||
ldws,ma 4(0,%r24),%r19
|
||||
|
||||
addib,<= -5,%r23,L(rest)
|
||||
sub %r20,%r19,%r28 C subtract first limbs ignoring cy
|
||||
|
||||
.label L(loop)
|
||||
ldws,ma 4(0,%r25),%r20
|
||||
ldws,ma 4(0,%r24),%r19
|
||||
stws,ma %r28,4(0,%r26)
|
||||
subb %r20,%r19,%r28
|
||||
ldws,ma 4(0,%r25),%r20
|
||||
ldws,ma 4(0,%r24),%r19
|
||||
stws,ma %r28,4(0,%r26)
|
||||
subb %r20,%r19,%r28
|
||||
ldws,ma 4(0,%r25),%r20
|
||||
ldws,ma 4(0,%r24),%r19
|
||||
stws,ma %r28,4(0,%r26)
|
||||
subb %r20,%r19,%r28
|
||||
ldws,ma 4(0,%r25),%r20
|
||||
ldws,ma 4(0,%r24),%r19
|
||||
stws,ma %r28,4(0,%r26)
|
||||
addib,> -4,%r23,L(loop)
|
||||
subb %r20,%r19,%r28
|
||||
|
||||
.label L(rest)
|
||||
addib,= 4,%r23,L(end)
|
||||
nop
|
||||
|
||||
.label L(eloop)
|
||||
ldws,ma 4(0,%r25),%r20
|
||||
ldws,ma 4(0,%r24),%r19
|
||||
stws,ma %r28,4(0,%r26)
|
||||
addib,> -1,%r23,L(eloop)
|
||||
subb %r20,%r19,%r28
|
||||
|
||||
.label L(end)
|
||||
stws %r28,0(0,%r26)
|
||||
addc %r0,%r0,%r28
|
||||
bv 0(%r2)
|
||||
subi 1,%r28,%r28
|
||||
EPILOGUE()
|
||||
198
src/gmp/mpn/pa32/hppa1_1/pa7100/submul_1.asm
Normal file
198
src/gmp/mpn/pa32/hppa1_1/pa7100/submul_1.asm
Normal file
|
|
@ -0,0 +1,198 @@
|
|||
dnl HP-PA 7100/7200 mpn_submul_1 -- Multiply a limb vector with a limb and
|
||||
dnl subtract the result from a second limb vector.
|
||||
|
||||
dnl Copyright 1995, 2000, 2001, 2002 Free Software Foundation, Inc.
|
||||
|
||||
dnl This file is part of the GNU MP Library.
|
||||
|
||||
dnl The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
dnl it under the terms of the GNU Lesser General Public License as published
|
||||
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
|
||||
dnl your option) any later version.
|
||||
|
||||
dnl The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||
dnl License for more details.
|
||||
|
||||
dnl You should have received a copy of the GNU Lesser General Public License
|
||||
dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
|
||||
dnl MA 02111-1307, USA.
|
||||
|
||||
include(`../config.m4')
|
||||
|
||||
C INPUT PARAMETERS
|
||||
define(`res_ptr',`%r26')
|
||||
define(`s1_ptr',`%r25')
|
||||
define(`size_param',`%r24')
|
||||
define(`s2_limb',`%r23')
|
||||
|
||||
define(`cylimb',`%r28')
|
||||
define(`s0',`%r19')
|
||||
define(`s1',`%r20')
|
||||
define(`s2',`%r3')
|
||||
define(`s3',`%r4')
|
||||
define(`lo0',`%r21')
|
||||
define(`lo1',`%r5')
|
||||
define(`lo2',`%r6')
|
||||
define(`lo3',`%r7')
|
||||
define(`hi0',`%r22')
|
||||
define(`hi1',`%r23') C safe to reuse
|
||||
define(`hi2',`%r29')
|
||||
define(`hi3',`%r1')
|
||||
|
||||
ASM_START()
|
||||
PROLOGUE(mpn_submul_1)
|
||||
C .callinfo frame=128,no_calls
|
||||
|
||||
ldo 128(%r30),%r30
|
||||
stws s2_limb,-16(%r30)
|
||||
add %r0,%r0,cylimb C clear cy and cylimb
|
||||
addib,< -4,size_param,L(few_limbs)
|
||||
fldws -16(%r30),%fr31R
|
||||
|
||||
ldo -112(%r30),%r31
|
||||
stw %r3,-96(%r30)
|
||||
stw %r4,-92(%r30)
|
||||
stw %r5,-88(%r30)
|
||||
stw %r6,-84(%r30)
|
||||
stw %r7,-80(%r30)
|
||||
|
||||
bb,>=,n s1_ptr,29,L(0)
|
||||
|
||||
fldws,ma 4(s1_ptr),%fr4
|
||||
ldws 0(res_ptr),s0
|
||||
xmpyu %fr4,%fr31R,%fr5
|
||||
fstds %fr5,-16(%r31)
|
||||
ldws -16(%r31),cylimb
|
||||
ldws -12(%r31),lo0
|
||||
sub s0,lo0,s0
|
||||
add s0,lo0,%r0 C invert cy
|
||||
addib,< -1,size_param,L(few_limbs)
|
||||
stws,ma s0,4(res_ptr)
|
||||
|
||||
C start software pipeline ----------------------------------------------------
|
||||
.label L(0)
|
||||
fldds,ma 8(s1_ptr),%fr4
|
||||
fldds,ma 8(s1_ptr),%fr8
|
||||
|
||||
xmpyu %fr4L,%fr31R,%fr5
|
||||
xmpyu %fr4R,%fr31R,%fr6
|
||||
xmpyu %fr8L,%fr31R,%fr9
|
||||
xmpyu %fr8R,%fr31R,%fr10
|
||||
|
||||
fstds %fr5,-16(%r31)
|
||||
fstds %fr6,-8(%r31)
|
||||
fstds %fr9,0(%r31)
|
||||
fstds %fr10,8(%r31)
|
||||
|
||||
ldws -16(%r31),hi0
|
||||
ldws -12(%r31),lo0
|
||||
ldws -8(%r31),hi1
|
||||
ldws -4(%r31),lo1
|
||||
ldws 0(%r31),hi2
|
||||
ldws 4(%r31),lo2
|
||||
ldws 8(%r31),hi3
|
||||
ldws 12(%r31),lo3
|
||||
|
||||
addc lo0,cylimb,lo0
|
||||
addc lo1,hi0,lo1
|
||||
addc lo2,hi1,lo2
|
||||
addc lo3,hi2,lo3
|
||||
|
||||
addib,< -4,size_param,L(end)
|
||||
addc %r0,hi3,cylimb C propagate carry into cylimb
|
||||
C main loop ------------------------------------------------------------------
|
||||
.label L(loop)
|
||||
fldds,ma 8(s1_ptr),%fr4
|
||||
fldds,ma 8(s1_ptr),%fr8
|
||||
|
||||
ldws 0(res_ptr),s0
|
||||
xmpyu %fr4L,%fr31R,%fr5
|
||||
ldws 4(res_ptr),s1
|
||||
xmpyu %fr4R,%fr31R,%fr6
|
||||
ldws 8(res_ptr),s2
|
||||
xmpyu %fr8L,%fr31R,%fr9
|
||||
ldws 12(res_ptr),s3
|
||||
xmpyu %fr8R,%fr31R,%fr10
|
||||
|
||||
fstds %fr5,-16(%r31)
|
||||
sub s0,lo0,s0
|
||||
fstds %fr6,-8(%r31)
|
||||
subb s1,lo1,s1
|
||||
fstds %fr9,0(%r31)
|
||||
subb s2,lo2,s2
|
||||
fstds %fr10,8(%r31)
|
||||
subb s3,lo3,s3
|
||||
subb %r0,%r0,lo0 C these two insns ...
|
||||
add lo0,lo0,%r0 C ... just invert cy
|
||||
|
||||
ldws -16(%r31),hi0
|
||||
ldws -12(%r31),lo0
|
||||
ldws -8(%r31),hi1
|
||||
ldws -4(%r31),lo1
|
||||
ldws 0(%r31),hi2
|
||||
ldws 4(%r31),lo2
|
||||
ldws 8(%r31),hi3
|
||||
ldws 12(%r31),lo3
|
||||
|
||||
addc lo0,cylimb,lo0
|
||||
stws,ma s0,4(res_ptr)
|
||||
addc lo1,hi0,lo1
|
||||
stws,ma s1,4(res_ptr)
|
||||
addc lo2,hi1,lo2
|
||||
stws,ma s2,4(res_ptr)
|
||||
addc lo3,hi2,lo3
|
||||
stws,ma s3,4(res_ptr)
|
||||
|
||||
addib,>= -4,size_param,L(loop)
|
||||
addc %r0,hi3,cylimb C propagate carry into cylimb
|
||||
C finish software pipeline ---------------------------------------------------
|
||||
.label L(end)
|
||||
ldws 0(res_ptr),s0
|
||||
ldws 4(res_ptr),s1
|
||||
ldws 8(res_ptr),s2
|
||||
ldws 12(res_ptr),s3
|
||||
|
||||
sub s0,lo0,s0
|
||||
stws,ma s0,4(res_ptr)
|
||||
subb s1,lo1,s1
|
||||
stws,ma s1,4(res_ptr)
|
||||
subb s2,lo2,s2
|
||||
stws,ma s2,4(res_ptr)
|
||||
subb s3,lo3,s3
|
||||
stws,ma s3,4(res_ptr)
|
||||
subb %r0,%r0,lo0 C these two insns ...
|
||||
add lo0,lo0,%r0 C ... invert cy
|
||||
|
||||
C restore callee-saves registers ---------------------------------------------
|
||||
ldw -96(%r30),%r3
|
||||
ldw -92(%r30),%r4
|
||||
ldw -88(%r30),%r5
|
||||
ldw -84(%r30),%r6
|
||||
ldw -80(%r30),%r7
|
||||
|
||||
.label L(few_limbs)
|
||||
addib,=,n 4,size_param,L(ret)
|
||||
|
||||
.label L(loop2)
|
||||
fldws,ma 4(s1_ptr),%fr4
|
||||
ldws 0(res_ptr),s0
|
||||
xmpyu %fr4,%fr31R,%fr5
|
||||
fstds %fr5,-16(%r30)
|
||||
ldws -16(%r30),hi0
|
||||
ldws -12(%r30),lo0
|
||||
addc lo0,cylimb,lo0
|
||||
addc %r0,hi0,cylimb
|
||||
sub s0,lo0,s0
|
||||
add s0,lo0,%r0 C invert cy
|
||||
stws,ma s0,4(res_ptr)
|
||||
addib,<> -1,size_param,L(loop2)
|
||||
nop
|
||||
|
||||
.label L(ret)
|
||||
addc %r0,cylimb,cylimb
|
||||
bv 0(%r2)
|
||||
ldo -128(%r30),%r30
|
||||
EPILOGUE(mpn_submul_1)
|
||||
51
src/gmp/mpn/pa32/hppa1_1/sqr_diagonal.asm
Normal file
51
src/gmp/mpn/pa32/hppa1_1/sqr_diagonal.asm
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
dnl HP-PA 1.1 32-bit mpn_sqr_diagonal.
|
||||
|
||||
dnl Copyright 2001, 2002 Free Software Foundation, Inc.
|
||||
|
||||
dnl This file is part of the GNU MP Library.
|
||||
|
||||
dnl The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
dnl it under the terms of the GNU Lesser General Public License as published
|
||||
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
|
||||
dnl your option) any later version.
|
||||
|
||||
dnl The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||
dnl License for more details.
|
||||
|
||||
dnl You should have received a copy of the GNU Lesser General Public License
|
||||
dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
|
||||
dnl MA 02111-1307, USA.
|
||||
|
||||
include(`../config.m4')
|
||||
|
||||
C This code runs at 6 cycles/limb on the PA7100 and 2.5 cycles/limb on PA8x00.
|
||||
C 2-way unrolling wouldn't help the PA7100; it could however bring times down
|
||||
C to 2.0 cycles/limb for the PA8x00.
|
||||
|
||||
C INPUT PARAMETERS
|
||||
define(`rp',`%r26')
|
||||
define(`up',`%r25')
|
||||
define(`n',`%r24')
|
||||
|
||||
ASM_START()
|
||||
PROLOGUE(mpn_sqr_diagonal)
|
||||
ldo 4(rp),rp
|
||||
fldws,ma 4(up),%fr4r
|
||||
addib,= -1,n,L(exit)
|
||||
xmpyu %fr4r,%fr4r,%fr5
|
||||
|
||||
.label L(loop)
|
||||
fldws,ma 4(up),%fr4r
|
||||
fstws %fr5r,-4(rp)
|
||||
fstws,ma %fr5l,8(rp)
|
||||
addib,<> -1,n,L(loop)
|
||||
xmpyu %fr4r,%fr4r,%fr5
|
||||
|
||||
.label L(exit)
|
||||
fstws %fr5r,-4(rp)
|
||||
bv 0(%r2)
|
||||
fstws %fr5l,0(rp)
|
||||
EPILOGUE(mpn_sqr_diagonal)
|
||||
107
src/gmp/mpn/pa32/hppa1_1/submul_1.asm
Normal file
107
src/gmp/mpn/pa32/hppa1_1/submul_1.asm
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
dnl HP-PA 1.1 mpn_submul_1 -- Multiply a limb vector with a limb and subtract
|
||||
dnl the result from a second limb vector.
|
||||
|
||||
dnl Copyright 1992, 1993, 1994, 2000, 2001, 2002 Free Software Foundation,
|
||||
dnl Inc.
|
||||
|
||||
dnl This file is part of the GNU MP Library.
|
||||
|
||||
dnl The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
dnl it under the terms of the GNU Lesser General Public License as published
|
||||
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
|
||||
dnl your option) any later version.
|
||||
|
||||
dnl The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||
dnl License for more details.
|
||||
|
||||
dnl You should have received a copy of the GNU Lesser General Public License
|
||||
dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
|
||||
dnl MA 02111-1307, USA.
|
||||
|
||||
include(`../config.m4')
|
||||
|
||||
C INPUT PARAMETERS
|
||||
C res_ptr r26
|
||||
C s1_ptr r25
|
||||
C size r24
|
||||
C s2_limb r23
|
||||
|
||||
C This runs at 12 cycles/limb on a PA7000. With the used instructions, it can
|
||||
C not become faster due to data cache contention after a store. On the PA7100
|
||||
C it runs at 11 cycles/limb.
|
||||
|
||||
C There are some ideas described in mul_1.asm that applies to this code too.
|
||||
|
||||
C It seems possible to make this run as fast as mpn_addmul_1, if we use
|
||||
C sub,>>= %r29,%r19,%r22
|
||||
C addi 1,%r28,%r28
|
||||
C but that requires reworking the hairy software pipeline...
|
||||
|
||||
ASM_START()
|
||||
PROLOGUE(mpn_submul_1)
|
||||
C .callinfo frame=64,no_calls
|
||||
|
||||
ldo 64(%r30),%r30
|
||||
fldws,ma 4(%r25),%fr5
|
||||
stw %r23,-16(%r30) C move s2_limb ...
|
||||
addib,= -1,%r24,L(just_one_limb)
|
||||
fldws -16(%r30),%fr4 C ... into fr4
|
||||
add %r0,%r0,%r0 C clear carry
|
||||
xmpyu %fr4,%fr5,%fr6
|
||||
fldws,ma 4(%r25),%fr7
|
||||
fstds %fr6,-16(%r30)
|
||||
xmpyu %fr4,%fr7,%fr8
|
||||
ldw -12(%r30),%r19 C least significant limb in product
|
||||
ldw -16(%r30),%r28
|
||||
|
||||
fstds %fr8,-16(%r30)
|
||||
addib,= -1,%r24,L(end)
|
||||
ldw -12(%r30),%r1
|
||||
|
||||
C Main loop
|
||||
.label L(loop)
|
||||
ldws 0(%r26),%r29
|
||||
fldws,ma 4(%r25),%fr5
|
||||
sub %r29,%r19,%r22
|
||||
add %r22,%r19,%r0
|
||||
stws,ma %r22,4(%r26)
|
||||
addc %r28,%r1,%r19
|
||||
xmpyu %fr4,%fr5,%fr6
|
||||
ldw -16(%r30),%r28
|
||||
fstds %fr6,-16(%r30)
|
||||
addc %r0,%r28,%r28
|
||||
addib,<> -1,%r24,L(loop)
|
||||
ldw -12(%r30),%r1
|
||||
|
||||
.label L(end)
|
||||
ldw 0(%r26),%r29
|
||||
sub %r29,%r19,%r22
|
||||
add %r22,%r19,%r0
|
||||
stws,ma %r22,4(%r26)
|
||||
addc %r28,%r1,%r19
|
||||
ldw -16(%r30),%r28
|
||||
ldws 0(%r26),%r29
|
||||
addc %r0,%r28,%r28
|
||||
sub %r29,%r19,%r22
|
||||
add %r22,%r19,%r0
|
||||
stws,ma %r22,4(%r26)
|
||||
addc %r0,%r28,%r28
|
||||
bv 0(%r2)
|
||||
ldo -64(%r30),%r30
|
||||
|
||||
.label L(just_one_limb)
|
||||
xmpyu %fr4,%fr5,%fr6
|
||||
ldw 0(%r26),%r29
|
||||
fstds %fr6,-16(%r30)
|
||||
ldw -12(%r30),%r1
|
||||
ldw -16(%r30),%r28
|
||||
sub %r29,%r1,%r22
|
||||
add %r22,%r1,%r0
|
||||
stw %r22,0(%r26)
|
||||
addc %r0,%r28,%r28
|
||||
bv 0(%r2)
|
||||
ldo -64(%r30),%r30
|
||||
EPILOGUE()
|
||||
96
src/gmp/mpn/pa32/hppa1_1/udiv_qrnnd.asm
Normal file
96
src/gmp/mpn/pa32/hppa1_1/udiv_qrnnd.asm
Normal file
|
|
@ -0,0 +1,96 @@
|
|||
dnl HP-PA __udiv_qrnnd division support, used from longlong.h.
|
||||
dnl This version runs fast on PA 7000 and later.
|
||||
|
||||
dnl Copyright 1993, 1994, 2000, 2001 Free Software Foundation, Inc.
|
||||
|
||||
dnl This file is part of the GNU MP Library.
|
||||
|
||||
dnl The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
dnl it under the terms of the GNU Lesser General Public License as published
|
||||
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
|
||||
dnl your option) any later version.
|
||||
|
||||
dnl The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||
dnl License for more details.
|
||||
|
||||
dnl You should have received a copy of the GNU Lesser General Public License
|
||||
dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
|
||||
dnl MA 02111-1307, USA.
|
||||
|
||||
include(`../config.m4')
|
||||
|
||||
C INPUT PARAMETERS
|
||||
C rem_ptr gr26
|
||||
C n1 gr25
|
||||
C n0 gr24
|
||||
C d gr23
|
||||
|
||||
C This file has caused a lot of trouble, since it demands PIC reference to
|
||||
C static data, which triggers bugs in gas (at least version 2.7 through
|
||||
C 2.11.2). When the bug is triggered, many bogus relocs are generated. The
|
||||
C current solution is to stuff data right into the code, and refer it using
|
||||
C absolute offsets. Fragile to be sure, but nothing else seems to work.
|
||||
|
||||
ASM_START()
|
||||
ifdef(`PIC',`',
|
||||
` RODATA
|
||||
INT64(L(0000), 0x43f00000, 0x0) C 2^64
|
||||
')
|
||||
|
||||
PROLOGUE(mpn_udiv_qrnnd)
|
||||
.proc
|
||||
.callinfo frame=64,no_calls
|
||||
.entry
|
||||
|
||||
ldo 64(%r30),%r30
|
||||
|
||||
stws %r25,-16(0,%r30) C n_hi
|
||||
stws %r24,-12(0,%r30) C n_lo
|
||||
|
||||
ifdef(`PIC',
|
||||
` bl .+20,%r31
|
||||
dep %r0,31,2,%r31
|
||||
.word 0x0 C padding for alignment
|
||||
.word 0x43f00000, 0x0 C 2^64
|
||||
ldo 4(%r31),%r31',
|
||||
` ldil `L'%L(0000),%r31
|
||||
ldo R%L(0000)(%r31),%r31')
|
||||
|
||||
fldds -16(0,%r30),%fr5
|
||||
stws %r23,-12(0,%r30)
|
||||
comib,<= 0,%r25,L(1)
|
||||
fcnvxf,dbl,dbl %fr5,%fr5
|
||||
fldds 0(0,%r31),%fr4
|
||||
fadd,dbl %fr4,%fr5,%fr5
|
||||
|
||||
.label L(1)
|
||||
fcpy,sgl %fr0,%fr6L
|
||||
fldws -12(0,%r30),%fr6R
|
||||
fcnvxf,dbl,dbl %fr6,%fr4
|
||||
|
||||
fdiv,dbl %fr5,%fr4,%fr5
|
||||
|
||||
fcnvfx,dbl,dbl %fr5,%fr4
|
||||
fstws %fr4R,-16(%r30)
|
||||
xmpyu %fr4R,%fr6R,%fr6
|
||||
ldws -16(%r30),%r28
|
||||
fstds %fr6,-16(0,%r30)
|
||||
ldws -12(0,%r30),%r21
|
||||
ldws -16(0,%r30),%r20
|
||||
sub %r24,%r21,%r22
|
||||
subb %r25,%r20,%r20
|
||||
comib,= 0,%r20,L(2)
|
||||
ldo -64(%r30),%r30
|
||||
|
||||
add %r22,%r23,%r22
|
||||
ldo -1(%r28),%r28
|
||||
|
||||
.label L(2)
|
||||
bv 0(%r2)
|
||||
stws %r22,0(0,%r26)
|
||||
.exit
|
||||
.procend
|
||||
EPILOGUE(mpn_udiv_qrnnd)
|
||||
38
src/gmp/mpn/pa32/hppa1_1/umul.asm
Normal file
38
src/gmp/mpn/pa32/hppa1_1/umul.asm
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
dnl Copyright 1999, 2001 Free Software Foundation, Inc.
|
||||
|
||||
dnl This file is part of the GNU MP Library.
|
||||
|
||||
dnl The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
dnl it under the terms of the GNU Lesser General Public License as published
|
||||
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
|
||||
dnl your option) any later version.
|
||||
|
||||
dnl The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||
dnl License for more details.
|
||||
|
||||
dnl You should have received a copy of the GNU Lesser General Public License
|
||||
dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
|
||||
dnl MA 02111-1307, USA.
|
||||
|
||||
include(`../config.m4')
|
||||
|
||||
ASM_START()
|
||||
PROLOGUE(mpn_umul_ppmm)
|
||||
C .callinfo frame=64,no_calls
|
||||
|
||||
ldo 64(%r30),%r30
|
||||
stw %r25,-16(0,%r30)
|
||||
fldws -16(0,%r30),%fr22R
|
||||
stw %r24,-16(0,%r30)
|
||||
fldws -16(0,%r30),%fr22L
|
||||
xmpyu %fr22R,%fr22L,%fr22
|
||||
fstds %fr22,-16(0,%r30)
|
||||
ldw -16(0,%r30),%r28
|
||||
ldw -12(0,%r30),%r29
|
||||
stw %r29,0(0,%r26)
|
||||
bv 0(%r2)
|
||||
ldo -64(%r30),%r30
|
||||
EPILOGUE()
|
||||
98
src/gmp/mpn/pa32/hppa2_0/add_n.asm
Normal file
98
src/gmp/mpn/pa32/hppa2_0/add_n.asm
Normal file
|
|
@ -0,0 +1,98 @@
|
|||
dnl HP-PA 2.0 32-bit mpn_add_n -- Add two limb vectors of the same length > 0
|
||||
dnl and store sum in a third limb vector.
|
||||
|
||||
dnl Copyright 1997, 1998, 2000, 2001, 2002 Free Software Foundation, Inc.
|
||||
|
||||
dnl This file is part of the GNU MP Library.
|
||||
|
||||
dnl The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
dnl it under the terms of the GNU Lesser General Public License as published
|
||||
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
|
||||
dnl your option) any later version.
|
||||
|
||||
dnl The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||
dnl License for more details.
|
||||
|
||||
dnl You should have received a copy of the GNU Lesser General Public License
|
||||
dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
|
||||
dnl MA 02111-1307, USA.
|
||||
|
||||
include(`../config.m4')
|
||||
|
||||
C INPUT PARAMETERS
|
||||
C res_ptr gr26
|
||||
C s1_ptr gr25
|
||||
C s2_ptr gr24
|
||||
C size gr23
|
||||
|
||||
C This runs at 2 cycles/limb on PA8000.
|
||||
|
||||
ASM_START()
|
||||
PROLOGUE(mpn_add_n)
|
||||
sub %r0,%r23,%r22
|
||||
zdep %r22,30,3,%r28 C r28 = 2 * (-n & 7)
|
||||
zdep %r22,29,3,%r22 C r22 = 4 * (-n & 7)
|
||||
sub %r25,%r22,%r25 C offset s1_ptr
|
||||
sub %r24,%r22,%r24 C offset s2_ptr
|
||||
sub %r26,%r22,%r26 C offset res_ptr
|
||||
blr %r28,%r0 C branch into loop
|
||||
add %r0,%r0,%r0 C reset carry
|
||||
|
||||
.label L(loop)
|
||||
ldw 0(%r25),%r20
|
||||
ldw 0(%r24),%r31
|
||||
addc %r20,%r31,%r20
|
||||
stw %r20,0(%r26)
|
||||
|
||||
.label L(7)
|
||||
ldw 4(%r25),%r21
|
||||
ldw 4(%r24),%r19
|
||||
addc %r21,%r19,%r21
|
||||
stw %r21,4(%r26)
|
||||
|
||||
.label L(6)
|
||||
ldw 8(%r25),%r20
|
||||
ldw 8(%r24),%r31
|
||||
addc %r20,%r31,%r20
|
||||
stw %r20,8(%r26)
|
||||
|
||||
.label L(5)
|
||||
ldw 12(%r25),%r21
|
||||
ldw 12(%r24),%r19
|
||||
addc %r21,%r19,%r21
|
||||
stw %r21,12(%r26)
|
||||
|
||||
.label L(4)
|
||||
ldw 16(%r25),%r20
|
||||
ldw 16(%r24),%r31
|
||||
addc %r20,%r31,%r20
|
||||
stw %r20,16(%r26)
|
||||
|
||||
.label L(3)
|
||||
ldw 20(%r25),%r21
|
||||
ldw 20(%r24),%r19
|
||||
addc %r21,%r19,%r21
|
||||
stw %r21,20(%r26)
|
||||
|
||||
.label L(2)
|
||||
ldw 24(%r25),%r20
|
||||
ldw 24(%r24),%r31
|
||||
addc %r20,%r31,%r20
|
||||
stw %r20,24(%r26)
|
||||
|
||||
.label L(1)
|
||||
ldw 28(%r25),%r21
|
||||
ldo 32(%r25),%r25
|
||||
ldw 28(%r24),%r19
|
||||
addc %r21,%r19,%r21
|
||||
stw %r21,28(%r26)
|
||||
ldo 32(%r24),%r24
|
||||
addib,> -8,%r23,L(loop)
|
||||
ldo 32(%r26),%r26
|
||||
|
||||
bv (%r2)
|
||||
addc %r0,%r0,%r28
|
||||
EPILOGUE()
|
||||
57
src/gmp/mpn/pa32/hppa2_0/gmp-mparam.h
Normal file
57
src/gmp/mpn/pa32/hppa2_0/gmp-mparam.h
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
/* gmp-mparam.h -- Compiler/machine parameter header file.
|
||||
|
||||
Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002 Free Software Foundation,
|
||||
Inc.
|
||||
|
||||
This file is part of the GNU MP Library.
|
||||
|
||||
The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation; either version 2.1 of the License, or (at your
|
||||
option) any later version.
|
||||
|
||||
The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||
License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
|
||||
MA 02111-1307, USA. */
|
||||
|
||||
#define BITS_PER_MP_LIMB 32
|
||||
#define BYTES_PER_MP_LIMB 4
|
||||
|
||||
/* Generated by tuneup.c, 2001-02-18, gcc 2.95 */
|
||||
|
||||
#define MUL_KARATSUBA_THRESHOLD 16
|
||||
#define MUL_TOOM3_THRESHOLD 129
|
||||
|
||||
#define SQR_BASECASE_THRESHOLD 6
|
||||
#define SQR_KARATSUBA_THRESHOLD 48
|
||||
#define SQR_TOOM3_THRESHOLD 153
|
||||
|
||||
#define DIV_SB_PREINV_THRESHOLD 6
|
||||
#define DIV_DC_THRESHOLD 102
|
||||
#define POWM_THRESHOLD 166
|
||||
|
||||
#define GCD_ACCEL_THRESHOLD 4
|
||||
#define GCDEXT_THRESHOLD 0
|
||||
|
||||
#define DIVREM_1_NORM_THRESHOLD 4
|
||||
#define DIVREM_1_UNNORM_THRESHOLD 6
|
||||
#define MOD_1_NORM_THRESHOLD 4
|
||||
#define MOD_1_UNNORM_THRESHOLD 6
|
||||
#define USE_PREINV_MOD_1 0
|
||||
#define DIVREM_2_THRESHOLD 0
|
||||
#define DIVEXACT_1_THRESHOLD 0
|
||||
#define MODEXACT_1_ODD_THRESHOLD 0
|
||||
|
||||
#define MUL_FFT_TABLE { 656, 928, 1920, 3584, 14336, 24576, 0 }
|
||||
#define MUL_FFT_MODF_THRESHOLD 584
|
||||
#define MUL_FFT_THRESHOLD 3840
|
||||
|
||||
#define SQR_FFT_TABLE { 656, 928, 1920, 3584, 14336, 24576, 0 }
|
||||
#define SQR_FFT_MODF_THRESHOLD 616
|
||||
#define SQR_FFT_THRESHOLD 3840
|
||||
103
src/gmp/mpn/pa32/hppa2_0/sqr_diagonal.asm
Normal file
103
src/gmp/mpn/pa32/hppa2_0/sqr_diagonal.asm
Normal file
|
|
@ -0,0 +1,103 @@
|
|||
dnl HP-PA 32-bit mpn_sqr_diagonal optimized for the PA8x00.
|
||||
|
||||
dnl Copyright 2001, 2002 Free Software Foundation, Inc.
|
||||
|
||||
dnl This file is part of the GNU MP Library.
|
||||
|
||||
dnl The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
dnl it under the terms of the GNU Lesser General Public License as published
|
||||
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
|
||||
dnl your option) any later version.
|
||||
|
||||
dnl The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||
dnl License for more details.
|
||||
|
||||
dnl You should have received a copy of the GNU Lesser General Public License
|
||||
dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
|
||||
dnl MA 02111-1307, USA.
|
||||
|
||||
include(`../config.m4')
|
||||
|
||||
C This code runs at 6 cycles/limb on the PA7100 and 2 cycles/limb on PA8x00.
|
||||
C The 2-way unrolling is actually not helping the PA7100.
|
||||
|
||||
C INPUT PARAMETERS
|
||||
define(`rp',`%r26')
|
||||
define(`up',`%r25')
|
||||
define(`n',`%r24')
|
||||
|
||||
ASM_START()
|
||||
PROLOGUE(mpn_sqr_diagonal)
|
||||
|
||||
fldws,ma 4(up),%fr4r
|
||||
addib,= -1,n,L(end1)
|
||||
ldo 4(rp),rp
|
||||
|
||||
fldws,ma 4(up),%fr6r
|
||||
addib,= -1,n,L(end2)
|
||||
xmpyu %fr4r,%fr4r,%fr5
|
||||
|
||||
fldws,ma 4(up),%fr4r
|
||||
addib,= -1,n,L(end3)
|
||||
xmpyu %fr6r,%fr6r,%fr7
|
||||
|
||||
|
||||
.label L(loop)
|
||||
fldws,ma 4(up),%fr6r
|
||||
fstws %fr5r,-4(rp)
|
||||
fstws,ma %fr5l,8(rp)
|
||||
addib,= -1,n,L(exite)
|
||||
xmpyu %fr4r,%fr4r,%fr5
|
||||
fldws,ma 4(up),%fr4r
|
||||
fstws %fr7r,-4(rp)
|
||||
fstws,ma %fr7l,8(rp)
|
||||
addib,<> -1,n,L(loop)
|
||||
xmpyu %fr6r,%fr6r,%fr7
|
||||
|
||||
.label L(exito)
|
||||
fstws %fr5r,-4(rp)
|
||||
fstws %fr5l,0(rp)
|
||||
xmpyu %fr4r,%fr4r,%fr5
|
||||
fstws %fr7r,4(rp)
|
||||
fstws %fr7l,8(rp)
|
||||
fstws,mb %fr5r,12(rp)
|
||||
bv 0(%r2)
|
||||
fstws %fr5l,4(rp)
|
||||
|
||||
.label L(exite)
|
||||
fstws %fr7r,-4(rp)
|
||||
fstws %fr7l,0(rp)
|
||||
xmpyu %fr6r,%fr6r,%fr7
|
||||
fstws %fr5r,4(rp)
|
||||
fstws %fr5l,8(rp)
|
||||
fstws,mb %fr7r,12(rp)
|
||||
bv 0(%r2)
|
||||
fstws %fr7l,4(rp)
|
||||
|
||||
.label L(end1)
|
||||
xmpyu %fr4r,%fr4r,%fr5
|
||||
fstws %fr5r,-4(rp)
|
||||
bv 0(%r2)
|
||||
fstws,ma %fr5l,8(rp)
|
||||
|
||||
.label L(end2)
|
||||
xmpyu %fr6r,%fr6r,%fr7
|
||||
fstws %fr5r,-4(rp)
|
||||
fstws %fr5l,0(rp)
|
||||
fstws %fr7r,4(rp)
|
||||
bv 0(%r2)
|
||||
fstws %fr7l,8(rp)
|
||||
|
||||
.label L(end3)
|
||||
fstws %fr5r,-4(rp)
|
||||
fstws %fr5l,0(rp)
|
||||
xmpyu %fr4r,%fr4r,%fr5
|
||||
fstws %fr7r,4(rp)
|
||||
fstws %fr7l,8(rp)
|
||||
fstws,mb %fr5r,12(rp)
|
||||
bv 0(%r2)
|
||||
fstws %fr5l,4(rp)
|
||||
EPILOGUE(mpn_sqr_diagonal)
|
||||
98
src/gmp/mpn/pa32/hppa2_0/sub_n.asm
Normal file
98
src/gmp/mpn/pa32/hppa2_0/sub_n.asm
Normal file
|
|
@ -0,0 +1,98 @@
|
|||
dnl HP-PA 2.0 32-bit mpn_sub_n -- Subtract two limb vectors of the same
|
||||
dnl length > 0 and store difference in a third limb vector.
|
||||
|
||||
dnl Copyright 1997, 1998, 2000, 2001, 2002 Free Software Foundation, Inc.
|
||||
|
||||
dnl This file is part of the GNU MP Library.
|
||||
|
||||
dnl The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
dnl it under the terms of the GNU Lesser General Public License as published
|
||||
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
|
||||
dnl your option) any later version.
|
||||
|
||||
dnl The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||
dnl License for more details.
|
||||
|
||||
dnl You should have received a copy of the GNU Lesser General Public License
|
||||
dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
|
||||
dnl MA 02111-1307, USA.
|
||||
|
||||
include(`../config.m4')
|
||||
|
||||
C INPUT PARAMETERS
|
||||
C res_ptr gr26
|
||||
C s1_ptr gr25
|
||||
C s2_ptr gr24
|
||||
C size gr23
|
||||
|
||||
C This runs at 2 cycles/limb on PA8000.
|
||||
|
||||
ASM_START()
|
||||
PROLOGUE(mpn_sub_n)
|
||||
sub %r0,%r23,%r22
|
||||
zdep %r22,30,3,%r28 C r28 = 2 * (-n & 7)
|
||||
zdep %r22,29,3,%r22 C r22 = 4 * (-n & 7)
|
||||
sub %r25,%r22,%r25 C offset s1_ptr
|
||||
sub %r24,%r22,%r24 C offset s2_ptr
|
||||
blr %r28,%r0 C branch into loop
|
||||
sub %r26,%r22,%r26 C offset res_ptr and set carry
|
||||
|
||||
.label L(loop)
|
||||
ldw 0(%r25),%r20
|
||||
ldw 0(%r24),%r31
|
||||
subb %r20,%r31,%r20
|
||||
stw %r20,0(%r26)
|
||||
|
||||
.label L(7)
|
||||
ldw 4(%r25),%r21
|
||||
ldw 4(%r24),%r19
|
||||
subb %r21,%r19,%r21
|
||||
stw %r21,4(%r26)
|
||||
|
||||
.label L(6)
|
||||
ldw 8(%r25),%r20
|
||||
ldw 8(%r24),%r31
|
||||
subb %r20,%r31,%r20
|
||||
stw %r20,8(%r26)
|
||||
|
||||
.label L(5)
|
||||
ldw 12(%r25),%r21
|
||||
ldw 12(%r24),%r19
|
||||
subb %r21,%r19,%r21
|
||||
stw %r21,12(%r26)
|
||||
|
||||
.label L(4)
|
||||
ldw 16(%r25),%r20
|
||||
ldw 16(%r24),%r31
|
||||
subb %r20,%r31,%r20
|
||||
stw %r20,16(%r26)
|
||||
|
||||
.label L(3)
|
||||
ldw 20(%r25),%r21
|
||||
ldw 20(%r24),%r19
|
||||
subb %r21,%r19,%r21
|
||||
stw %r21,20(%r26)
|
||||
|
||||
.label L(2)
|
||||
ldw 24(%r25),%r20
|
||||
ldw 24(%r24),%r31
|
||||
subb %r20,%r31,%r20
|
||||
stw %r20,24(%r26)
|
||||
|
||||
.label L(1)
|
||||
ldw 28(%r25),%r21
|
||||
ldo 32(%r25),%r25
|
||||
ldw 28(%r24),%r19
|
||||
subb %r21,%r19,%r21
|
||||
stw %r21,28(%r26)
|
||||
ldo 32(%r24),%r24
|
||||
addib,> -8,%r23,L(loop)
|
||||
ldo 32(%r26),%r26
|
||||
|
||||
addc %r0,%r0,%r28
|
||||
bv (%r2)
|
||||
subi 1,%r28,%r28
|
||||
EPILOGUE()
|
||||
Loading…
Add table
Add a link
Reference in a new issue