Add more files from GMP 4.1.2

This commit is contained in:
jjgarcia 2003-10-30 10:39:24 +00:00
parent 4b11900381
commit 1d2c4cd6f0
17 changed files with 1611 additions and 0 deletions

View file

@ -0,0 +1,98 @@
dnl HP-PA 1.1 mpn_addmul_1 -- Multiply a limb vector with a limb and add the
dnl result to a second limb vector.
dnl Copyright 1992, 1993, 1994, 2000, 2001, 2002 Free Software Foundation,
dnl Inc.
dnl This file is part of the GNU MP Library.
dnl The GNU MP Library is free software; you can redistribute it and/or modify
dnl it under the terms of the GNU Lesser General Public License as published
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
dnl your option) any later version.
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
dnl License for more details.
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
dnl MA 02111-1307, USA.
include(`../config.m4')
C INPUT PARAMETERS
C res_ptr r26
C s1_ptr r25
C size r24
C s2_limb r23
C This runs at 11 cycles/limb on a PA7000. With the used instructions, it can
C not become faster due to data cache contention after a store. On the PA7100
C it runs at 10 cycles/limb.
C There are some ideas described in mul_1.asm that applies to this code too.
ASM_START()
PROLOGUE(mpn_addmul_1)
C .callinfo frame=64,no_calls
ldo 64(%r30),%r30
fldws,ma 4(%r25),%fr5
stw %r23,-16(%r30) C move s2_limb ...
addib,= -1,%r24,L(just_one_limb)
fldws -16(%r30),%fr4 C ... into fr4
add %r0,%r0,%r0 C clear carry
xmpyu %fr4,%fr5,%fr6
fldws,ma 4(%r25),%fr7
fstds %fr6,-16(%r30)
xmpyu %fr4,%fr7,%fr8
ldw -12(%r30),%r19 C least significant limb in product
ldw -16(%r30),%r28
fstds %fr8,-16(%r30)
addib,= -1,%r24,L(end)
ldw -12(%r30),%r1
C Main loop
.label L(loop)
ldws 0(%r26),%r29
fldws,ma 4(%r25),%fr5
add %r29,%r19,%r19
stws,ma %r19,4(%r26)
addc %r28,%r1,%r19
xmpyu %fr4,%fr5,%fr6
ldw -16(%r30),%r28
fstds %fr6,-16(%r30)
addc %r0,%r28,%r28
addib,<> -1,%r24,L(loop)
ldw -12(%r30),%r1
.label L(end)
ldw 0(%r26),%r29
add %r29,%r19,%r19
stws,ma %r19,4(%r26)
addc %r28,%r1,%r19
ldw -16(%r30),%r28
ldws 0(%r26),%r29
addc %r0,%r28,%r28
add %r29,%r19,%r19
stws,ma %r19,4(%r26)
addc %r0,%r28,%r28
bv 0(%r2)
ldo -64(%r30),%r30
.label L(just_one_limb)
xmpyu %fr4,%fr5,%fr6
ldw 0(%r26),%r29
fstds %fr6,-16(%r30)
ldw -12(%r30),%r1
ldw -16(%r30),%r28
add %r29,%r1,%r19
stw %r19,0(%r26)
addc %r0,%r28,%r28
bv 0(%r2)
ldo -64(%r30),%r30
EPILOGUE()

View file

@ -0,0 +1,63 @@
/* HP-PA 1.1 gmp-mparam.h -- Compiler/machine parameter header file.
Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002 Free Software Foundation,
Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or (at your
option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
MA 02111-1307, USA. */
#define BITS_PER_MP_LIMB 32
#define BYTES_PER_MP_LIMB 4
/* Generated by tuneup.c, 2002-03-07, gcc 2.8 (pa7100/100MHz) */
#define MUL_KARATSUBA_THRESHOLD 30
#define MUL_TOOM3_THRESHOLD 141
#define SQR_BASECASE_THRESHOLD 4
#define SQR_KARATSUBA_THRESHOLD 55
#define SQR_TOOM3_THRESHOLD 185
#define DIV_SB_PREINV_THRESHOLD 0 /* always */
#define DIV_DC_THRESHOLD 95
#define POWM_THRESHOLD 150
#define GCD_ACCEL_THRESHOLD 3
#define GCDEXT_THRESHOLD 0 /* always */
#define JACOBI_BASE_METHOD 2
#define DIVREM_1_NORM_THRESHOLD 3
#define DIVREM_1_UNNORM_THRESHOLD 6
#define MOD_1_NORM_THRESHOLD 3
#define MOD_1_UNNORM_THRESHOLD 6
#define USE_PREINV_DIVREM_1 1
#define USE_PREINV_MOD_1 1
#define DIVREM_2_THRESHOLD 0 /* always */
#define DIVEXACT_1_THRESHOLD 0 /* always */
#define MODEXACT_1_ODD_THRESHOLD 0 /* always */
#define GET_STR_DC_THRESHOLD 13
#define GET_STR_PRECOMPUTE_THRESHOLD 23
#define SET_STR_THRESHOLD 6589
#define MUL_FFT_TABLE { 592, 1440, 2688, 5632, 14336, 40960, 0 }
#define MUL_FFT_MODF_THRESHOLD 608
#define MUL_FFT_THRESHOLD 5888
#define SQR_FFT_TABLE { 624, 1504, 2688, 6656, 18432, 40960, 0 }
#define SQR_FFT_MODF_THRESHOLD 640
#define SQR_FFT_THRESHOLD 5376

View file

@ -0,0 +1,94 @@
dnl HP-PA 1.1 mpn_mul_1 -- Multiply a limb vector with a limb and store the
dnl result in a second limb vector.
dnl Copyright 1992, 1993, 1994, 2000, 2001, 2002 Free Software Foundation,
dnl Inc.
dnl This file is part of the GNU MP Library.
dnl The GNU MP Library is free software; you can redistribute it and/or modify
dnl it under the terms of the GNU Lesser General Public License as published
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
dnl your option) any later version.
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
dnl License for more details.
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
dnl MA 02111-1307, USA.
include(`../config.m4')
C INPUT PARAMETERS
C res_ptr r26
C s1_ptr r25
C size r24
C s2_limb r23
C This runs at 9 cycles/limb on a PA7000. With the used instructions, it can
C not become faster due to data cache contention after a store. On the PA7100
C it runs at 7 cycles/limb.
C We could use fldds to read two limbs at a time from the S1 array, and that
C could bring down the times to 8.5 and 6.5 cycles/limb for the PA7000 and
C PA7100, respectively. We don't do that since it does not seem worth the
C (alignment) troubles...
C At least the PA7100 is rumored to be able to deal with cache-misses without
C stalling instruction issue. If this is true, and the cache is actually also
C lockup-free, we should use a deeper software pipeline, and load from S1 very
C early! (The loads and stores to -12(sp) will surely be in the cache.)
ASM_START()
PROLOGUE(mpn_mul_1)
C .callinfo frame=64,no_calls
ldo 64(%r30),%r30
fldws,ma 4(%r25),%fr5
stw %r23,-16(%r30) C move s2_limb ...
addib,= -1,%r24,L(just_one_limb)
fldws -16(%r30),%fr4 C ... into fr4
add %r0,%r0,%r0 C clear carry
xmpyu %fr4,%fr5,%fr6
fldws,ma 4(%r25),%fr7
fstds %fr6,-16(%r30)
xmpyu %fr4,%fr7,%fr8
ldw -12(%r30),%r19 C least significant limb in product
ldw -16(%r30),%r28
fstds %fr8,-16(%r30)
addib,= -1,%r24,L(end)
ldw -12(%r30),%r1
C Main loop
.label L(loop)
fldws,ma 4(%r25),%fr5
stws,ma %r19,4(%r26)
addc %r28,%r1,%r19
xmpyu %fr4,%fr5,%fr6
ldw -16(%r30),%r28
fstds %fr6,-16(%r30)
addib,<> -1,%r24,L(loop)
ldw -12(%r30),%r1
.label L(end)
stws,ma %r19,4(%r26)
addc %r28,%r1,%r19
ldw -16(%r30),%r28
stws,ma %r19,4(%r26)
addc %r0,%r28,%r28
bv 0(%r2)
ldo -64(%r30),%r30
.label L(just_one_limb)
xmpyu %fr4,%fr5,%fr6
fstds %fr6,-16(%r30)
ldw -16(%r30),%r28
ldo -64(%r30),%r30
bv 0(%r2)
fstws %fr6R,0(%r26)
EPILOGUE()

View file

@ -0,0 +1,74 @@
dnl HP-PA mpn_add_n -- Add two limb vectors of the same length > 0 and store
dnl sum in a third limb vector. Optimized for the PA7100, where is runs at
dnl 4.25 cycles/limb.
dnl Copyright 1992, 1994, 2000, 2001, 2002 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl The GNU MP Library is free software; you can redistribute it and/or modify
dnl it under the terms of the GNU Lesser General Public License as published
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
dnl your option) any later version.
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
dnl License for more details.
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
dnl MA 02111-1307, USA.
include(`../config.m4')
C INPUT PARAMETERS
C res_ptr r26
C s1_ptr r25
C s2_ptr r24
C size r23
ASM_START()
PROLOGUE(mpn_add_n)
ldws,ma 4(0,%r25),%r20
ldws,ma 4(0,%r24),%r19
addib,<= -5,%r23,L(rest)
add %r20,%r19,%r28 C add first limbs ignoring cy
.label L(loop)
ldws,ma 4(0,%r25),%r20
ldws,ma 4(0,%r24),%r19
stws,ma %r28,4(0,%r26)
addc %r20,%r19,%r28
ldws,ma 4(0,%r25),%r20
ldws,ma 4(0,%r24),%r19
stws,ma %r28,4(0,%r26)
addc %r20,%r19,%r28
ldws,ma 4(0,%r25),%r20
ldws,ma 4(0,%r24),%r19
stws,ma %r28,4(0,%r26)
addc %r20,%r19,%r28
ldws,ma 4(0,%r25),%r20
ldws,ma 4(0,%r24),%r19
stws,ma %r28,4(0,%r26)
addib,> -4,%r23,L(loop)
addc %r20,%r19,%r28
.label L(rest)
addib,= 4,%r23,L(end)
nop
.label L(eloop)
ldws,ma 4(0,%r25),%r20
ldws,ma 4(0,%r24),%r19
stws,ma %r28,4(0,%r26)
addib,> -1,%r23,L(eloop)
addc %r20,%r19,%r28
.label L(end)
stws %r28,0(0,%r26)
bv 0(%r2)
addc %r0,%r0,%r28
EPILOGUE()

View file

@ -0,0 +1,192 @@
dnl HP-PA 7100/7200 mpn_addmul_1 -- Multiply a limb vector with a limb and
dnl add the result to a second limb vector.
dnl Copyright 1995, 2000, 2001, 2002 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl The GNU MP Library is free software; you can redistribute it and/or modify
dnl it under the terms of the GNU Lesser General Public License as published
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
dnl your option) any later version.
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
dnl License for more details.
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
dnl MA 02111-1307, USA.
include(`../config.m4')
C INPUT PARAMETERS
define(`res_ptr',`%r26')
define(`s1_ptr',`%r25')
define(`size_param',`%r24')
define(`s2_limb',`%r23')
define(`cylimb',`%r28')
define(`s0',`%r19')
define(`s1',`%r20')
define(`s2',`%r3')
define(`s3',`%r4')
define(`lo0',`%r21')
define(`lo1',`%r5')
define(`lo2',`%r6')
define(`lo3',`%r7')
define(`hi0',`%r22')
define(`hi1',`%r23') C safe to reuse
define(`hi2',`%r29')
define(`hi3',`%r1')
ASM_START()
PROLOGUE(mpn_addmul_1)
C .callinfo frame=128,no_calls
ldo 128(%r30),%r30
stws s2_limb,-16(%r30)
add %r0,%r0,cylimb C clear cy and cylimb
addib,< -4,size_param,L(few_limbs)
fldws -16(%r30),%fr31R
ldo -112(%r30),%r31
stw %r3,-96(%r30)
stw %r4,-92(%r30)
stw %r5,-88(%r30)
stw %r6,-84(%r30)
stw %r7,-80(%r30)
bb,>=,n s1_ptr,29,L(0)
fldws,ma 4(s1_ptr),%fr4
ldws 0(res_ptr),s0
xmpyu %fr4,%fr31R,%fr5
fstds %fr5,-16(%r31)
ldws -16(%r31),cylimb
ldws -12(%r31),lo0
add s0,lo0,s0
addib,< -1,size_param,L(few_limbs)
stws,ma s0,4(res_ptr)
C start software pipeline ----------------------------------------------------
.label L(0)
fldds,ma 8(s1_ptr),%fr4
fldds,ma 8(s1_ptr),%fr8
xmpyu %fr4L,%fr31R,%fr5
xmpyu %fr4R,%fr31R,%fr6
xmpyu %fr8L,%fr31R,%fr9
xmpyu %fr8R,%fr31R,%fr10
fstds %fr5,-16(%r31)
fstds %fr6,-8(%r31)
fstds %fr9,0(%r31)
fstds %fr10,8(%r31)
ldws -16(%r31),hi0
ldws -12(%r31),lo0
ldws -8(%r31),hi1
ldws -4(%r31),lo1
ldws 0(%r31),hi2
ldws 4(%r31),lo2
ldws 8(%r31),hi3
ldws 12(%r31),lo3
addc lo0,cylimb,lo0
addc lo1,hi0,lo1
addc lo2,hi1,lo2
addc lo3,hi2,lo3
addib,< -4,size_param,L(end)
addc %r0,hi3,cylimb C propagate carry into cylimb
C main loop ------------------------------------------------------------------
.label L(loop)
fldds,ma 8(s1_ptr),%fr4
fldds,ma 8(s1_ptr),%fr8
ldws 0(res_ptr),s0
xmpyu %fr4L,%fr31R,%fr5
ldws 4(res_ptr),s1
xmpyu %fr4R,%fr31R,%fr6
ldws 8(res_ptr),s2
xmpyu %fr8L,%fr31R,%fr9
ldws 12(res_ptr),s3
xmpyu %fr8R,%fr31R,%fr10
fstds %fr5,-16(%r31)
add s0,lo0,s0
fstds %fr6,-8(%r31)
addc s1,lo1,s1
fstds %fr9,0(%r31)
addc s2,lo2,s2
fstds %fr10,8(%r31)
addc s3,lo3,s3
ldws -16(%r31),hi0
ldws -12(%r31),lo0
ldws -8(%r31),hi1
ldws -4(%r31),lo1
ldws 0(%r31),hi2
ldws 4(%r31),lo2
ldws 8(%r31),hi3
ldws 12(%r31),lo3
addc lo0,cylimb,lo0
stws,ma s0,4(res_ptr)
addc lo1,hi0,lo1
stws,ma s1,4(res_ptr)
addc lo2,hi1,lo2
stws,ma s2,4(res_ptr)
addc lo3,hi2,lo3
stws,ma s3,4(res_ptr)
addib,>= -4,size_param,L(loop)
addc %r0,hi3,cylimb C propagate carry into cylimb
C finish software pipeline ---------------------------------------------------
.label L(end)
ldws 0(res_ptr),s0
ldws 4(res_ptr),s1
ldws 8(res_ptr),s2
ldws 12(res_ptr),s3
add s0,lo0,s0
stws,ma s0,4(res_ptr)
addc s1,lo1,s1
stws,ma s1,4(res_ptr)
addc s2,lo2,s2
stws,ma s2,4(res_ptr)
addc s3,lo3,s3
stws,ma s3,4(res_ptr)
C restore callee-saves registers ---------------------------------------------
ldw -96(%r30),%r3
ldw -92(%r30),%r4
ldw -88(%r30),%r5
ldw -84(%r30),%r6
ldw -80(%r30),%r7
.label L(few_limbs)
addib,=,n 4,size_param,L(ret)
.label L(loop2)
fldws,ma 4(s1_ptr),%fr4
ldws 0(res_ptr),s0
xmpyu %fr4,%fr31R,%fr5
fstds %fr5,-16(%r30)
ldws -16(%r30),hi0
ldws -12(%r30),lo0
addc lo0,cylimb,lo0
addc %r0,hi0,cylimb
add s0,lo0,s0
stws,ma s0,4(res_ptr)
addib,<> -1,size_param,L(loop2)
nop
.label L(ret)
addc %r0,cylimb,cylimb
bv 0(%r2)
ldo -128(%r30),%r30
EPILOGUE(mpn_addmul_1)

View file

@ -0,0 +1,86 @@
dnl HP-PA mpn_lshift -- Shift a number left.
dnl Optimized for the PA7100, where is runs at 3.25 cycles/limb.
dnl Copyright 1992, 1994, 2000, 2001, 2002 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl The GNU MP Library is free software; you can redistribute it and/or modify
dnl it under the terms of the GNU Lesser General Public License as published
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
dnl your option) any later version.
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
dnl License for more details.
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
dnl MA 02111-1307, USA.
include(`../config.m4')
C INPUT PARAMETERS
C res_ptr r26
C s_ptr r25
C size r24
C cnt r23
ASM_START()
PROLOGUE(mpn_lshift)
sh2add %r24,%r25,%r25
sh2add %r24,%r26,%r26
ldws,mb -4(0,%r25),%r22
subi 32,%r23,%r1
mtsar %r1
addib,= -1,%r24,L(0004)
vshd %r0,%r22,%r28 C compute carry out limb
ldws,mb -4(0,%r25),%r29
addib,<= -5,%r24,L(rest)
vshd %r22,%r29,%r20
.label L(loop)
ldws,mb -4(0,%r25),%r22
stws,mb %r20,-4(0,%r26)
vshd %r29,%r22,%r20
ldws,mb -4(0,%r25),%r29
stws,mb %r20,-4(0,%r26)
vshd %r22,%r29,%r20
ldws,mb -4(0,%r25),%r22
stws,mb %r20,-4(0,%r26)
vshd %r29,%r22,%r20
ldws,mb -4(0,%r25),%r29
stws,mb %r20,-4(0,%r26)
addib,> -4,%r24,L(loop)
vshd %r22,%r29,%r20
.label L(rest)
addib,= 4,%r24,L(end1)
nop
.label L(eloop)
ldws,mb -4(0,%r25),%r22
stws,mb %r20,-4(0,%r26)
addib,<= -1,%r24,L(end2)
vshd %r29,%r22,%r20
ldws,mb -4(0,%r25),%r29
stws,mb %r20,-4(0,%r26)
addib,> -1,%r24,L(eloop)
vshd %r22,%r29,%r20
.label L(end1)
stws,mb %r20,-4(0,%r26)
vshd %r29,%r0,%r20
bv 0(%r2)
stw %r20,-4(0,%r26)
.label L(end2)
stws,mb %r20,-4(0,%r26)
.label L(0004)
vshd %r22,%r0,%r20
bv 0(%r2)
stw %r20,-4(0,%r26)
EPILOGUE()

View file

@ -0,0 +1,83 @@
dnl HP-PA mpn_rshift -- Shift a number right.
dnl Optimized for the PA7100, where is runs at 3.25 cycles/limb.
dnl Copyright 1992, 1994, 2000, 2001, 2002 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl The GNU MP Library is free software; you can redistribute it and/or modify
dnl it under the terms of the GNU Lesser General Public License as published
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
dnl your option) any later version.
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
dnl License for more details.
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
dnl MA 02111-1307, USA.
include(`../config.m4')
C INPUT PARAMETERS
C res_ptr r26
C s_ptr r25
C size r24
C cnt r23
ASM_START()
PROLOGUE(mpn_rshift)
ldws,ma 4(0,%r25),%r22
mtsar %r23
addib,= -1,%r24,L(0004)
vshd %r22,%r0,%r28 C compute carry out limb
ldws,ma 4(0,%r25),%r29
addib,<= -5,%r24,L(rest)
vshd %r29,%r22,%r20
.label L(loop)
ldws,ma 4(0,%r25),%r22
stws,ma %r20,4(0,%r26)
vshd %r22,%r29,%r20
ldws,ma 4(0,%r25),%r29
stws,ma %r20,4(0,%r26)
vshd %r29,%r22,%r20
ldws,ma 4(0,%r25),%r22
stws,ma %r20,4(0,%r26)
vshd %r22,%r29,%r20
ldws,ma 4(0,%r25),%r29
stws,ma %r20,4(0,%r26)
addib,> -4,%r24,L(loop)
vshd %r29,%r22,%r20
.label L(rest)
addib,= 4,%r24,L(end1)
nop
.label L(eloop)
ldws,ma 4(0,%r25),%r22
stws,ma %r20,4(0,%r26)
addib,<= -1,%r24,L(end2)
vshd %r22,%r29,%r20
ldws,ma 4(0,%r25),%r29
stws,ma %r20,4(0,%r26)
addib,> -1,%r24,L(eloop)
vshd %r29,%r22,%r20
.label L(end1)
stws,ma %r20,4(0,%r26)
vshd %r0,%r29,%r20
bv 0(%r2)
stw %r20,0(0,%r26)
.label L(end2)
stws,ma %r20,4(0,%r26)
.label L(0004)
vshd %r0,%r22,%r20
bv 0(%r2)
stw %r20,0(0,%r26)
EPILOGUE()

View file

@ -0,0 +1,75 @@
dnl HP-PA mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
dnl store difference in a third limb vector. Optimized for the PA7100, where
dnl is runs at 4.25 cycles/limb.
dnl Copyright 1992, 1994, 2000, 2001, 2002 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl The GNU MP Library is free software; you can redistribute it and/or modify
dnl it under the terms of the GNU Lesser General Public License as published
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
dnl your option) any later version.
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
dnl License for more details.
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
dnl MA 02111-1307, USA.
include(`../config.m4')
C INPUT PARAMETERS
C res_ptr r26
C s1_ptr r25
C s2_ptr r24
C size r23
ASM_START()
PROLOGUE(mpn_sub_n)
ldws,ma 4(0,%r25),%r20
ldws,ma 4(0,%r24),%r19
addib,<= -5,%r23,L(rest)
sub %r20,%r19,%r28 C subtract first limbs ignoring cy
.label L(loop)
ldws,ma 4(0,%r25),%r20
ldws,ma 4(0,%r24),%r19
stws,ma %r28,4(0,%r26)
subb %r20,%r19,%r28
ldws,ma 4(0,%r25),%r20
ldws,ma 4(0,%r24),%r19
stws,ma %r28,4(0,%r26)
subb %r20,%r19,%r28
ldws,ma 4(0,%r25),%r20
ldws,ma 4(0,%r24),%r19
stws,ma %r28,4(0,%r26)
subb %r20,%r19,%r28
ldws,ma 4(0,%r25),%r20
ldws,ma 4(0,%r24),%r19
stws,ma %r28,4(0,%r26)
addib,> -4,%r23,L(loop)
subb %r20,%r19,%r28
.label L(rest)
addib,= 4,%r23,L(end)
nop
.label L(eloop)
ldws,ma 4(0,%r25),%r20
ldws,ma 4(0,%r24),%r19
stws,ma %r28,4(0,%r26)
addib,> -1,%r23,L(eloop)
subb %r20,%r19,%r28
.label L(end)
stws %r28,0(0,%r26)
addc %r0,%r0,%r28
bv 0(%r2)
subi 1,%r28,%r28
EPILOGUE()

View file

@ -0,0 +1,198 @@
dnl HP-PA 7100/7200 mpn_submul_1 -- Multiply a limb vector with a limb and
dnl subtract the result from a second limb vector.
dnl Copyright 1995, 2000, 2001, 2002 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl The GNU MP Library is free software; you can redistribute it and/or modify
dnl it under the terms of the GNU Lesser General Public License as published
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
dnl your option) any later version.
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
dnl License for more details.
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
dnl MA 02111-1307, USA.
include(`../config.m4')
C INPUT PARAMETERS
define(`res_ptr',`%r26')
define(`s1_ptr',`%r25')
define(`size_param',`%r24')
define(`s2_limb',`%r23')
define(`cylimb',`%r28')
define(`s0',`%r19')
define(`s1',`%r20')
define(`s2',`%r3')
define(`s3',`%r4')
define(`lo0',`%r21')
define(`lo1',`%r5')
define(`lo2',`%r6')
define(`lo3',`%r7')
define(`hi0',`%r22')
define(`hi1',`%r23') C safe to reuse
define(`hi2',`%r29')
define(`hi3',`%r1')
ASM_START()
PROLOGUE(mpn_submul_1)
C .callinfo frame=128,no_calls
ldo 128(%r30),%r30
stws s2_limb,-16(%r30)
add %r0,%r0,cylimb C clear cy and cylimb
addib,< -4,size_param,L(few_limbs)
fldws -16(%r30),%fr31R
ldo -112(%r30),%r31
stw %r3,-96(%r30)
stw %r4,-92(%r30)
stw %r5,-88(%r30)
stw %r6,-84(%r30)
stw %r7,-80(%r30)
bb,>=,n s1_ptr,29,L(0)
fldws,ma 4(s1_ptr),%fr4
ldws 0(res_ptr),s0
xmpyu %fr4,%fr31R,%fr5
fstds %fr5,-16(%r31)
ldws -16(%r31),cylimb
ldws -12(%r31),lo0
sub s0,lo0,s0
add s0,lo0,%r0 C invert cy
addib,< -1,size_param,L(few_limbs)
stws,ma s0,4(res_ptr)
C start software pipeline ----------------------------------------------------
.label L(0)
fldds,ma 8(s1_ptr),%fr4
fldds,ma 8(s1_ptr),%fr8
xmpyu %fr4L,%fr31R,%fr5
xmpyu %fr4R,%fr31R,%fr6
xmpyu %fr8L,%fr31R,%fr9
xmpyu %fr8R,%fr31R,%fr10
fstds %fr5,-16(%r31)
fstds %fr6,-8(%r31)
fstds %fr9,0(%r31)
fstds %fr10,8(%r31)
ldws -16(%r31),hi0
ldws -12(%r31),lo0
ldws -8(%r31),hi1
ldws -4(%r31),lo1
ldws 0(%r31),hi2
ldws 4(%r31),lo2
ldws 8(%r31),hi3
ldws 12(%r31),lo3
addc lo0,cylimb,lo0
addc lo1,hi0,lo1
addc lo2,hi1,lo2
addc lo3,hi2,lo3
addib,< -4,size_param,L(end)
addc %r0,hi3,cylimb C propagate carry into cylimb
C main loop ------------------------------------------------------------------
.label L(loop)
fldds,ma 8(s1_ptr),%fr4
fldds,ma 8(s1_ptr),%fr8
ldws 0(res_ptr),s0
xmpyu %fr4L,%fr31R,%fr5
ldws 4(res_ptr),s1
xmpyu %fr4R,%fr31R,%fr6
ldws 8(res_ptr),s2
xmpyu %fr8L,%fr31R,%fr9
ldws 12(res_ptr),s3
xmpyu %fr8R,%fr31R,%fr10
fstds %fr5,-16(%r31)
sub s0,lo0,s0
fstds %fr6,-8(%r31)
subb s1,lo1,s1
fstds %fr9,0(%r31)
subb s2,lo2,s2
fstds %fr10,8(%r31)
subb s3,lo3,s3
subb %r0,%r0,lo0 C these two insns ...
add lo0,lo0,%r0 C ... just invert cy
ldws -16(%r31),hi0
ldws -12(%r31),lo0
ldws -8(%r31),hi1
ldws -4(%r31),lo1
ldws 0(%r31),hi2
ldws 4(%r31),lo2
ldws 8(%r31),hi3
ldws 12(%r31),lo3
addc lo0,cylimb,lo0
stws,ma s0,4(res_ptr)
addc lo1,hi0,lo1
stws,ma s1,4(res_ptr)
addc lo2,hi1,lo2
stws,ma s2,4(res_ptr)
addc lo3,hi2,lo3
stws,ma s3,4(res_ptr)
addib,>= -4,size_param,L(loop)
addc %r0,hi3,cylimb C propagate carry into cylimb
C finish software pipeline ---------------------------------------------------
.label L(end)
ldws 0(res_ptr),s0
ldws 4(res_ptr),s1
ldws 8(res_ptr),s2
ldws 12(res_ptr),s3
sub s0,lo0,s0
stws,ma s0,4(res_ptr)
subb s1,lo1,s1
stws,ma s1,4(res_ptr)
subb s2,lo2,s2
stws,ma s2,4(res_ptr)
subb s3,lo3,s3
stws,ma s3,4(res_ptr)
subb %r0,%r0,lo0 C these two insns ...
add lo0,lo0,%r0 C ... invert cy
C restore callee-saves registers ---------------------------------------------
ldw -96(%r30),%r3
ldw -92(%r30),%r4
ldw -88(%r30),%r5
ldw -84(%r30),%r6
ldw -80(%r30),%r7
.label L(few_limbs)
addib,=,n 4,size_param,L(ret)
.label L(loop2)
fldws,ma 4(s1_ptr),%fr4
ldws 0(res_ptr),s0
xmpyu %fr4,%fr31R,%fr5
fstds %fr5,-16(%r30)
ldws -16(%r30),hi0
ldws -12(%r30),lo0
addc lo0,cylimb,lo0
addc %r0,hi0,cylimb
sub s0,lo0,s0
add s0,lo0,%r0 C invert cy
stws,ma s0,4(res_ptr)
addib,<> -1,size_param,L(loop2)
nop
.label L(ret)
addc %r0,cylimb,cylimb
bv 0(%r2)
ldo -128(%r30),%r30
EPILOGUE(mpn_submul_1)

View file

@ -0,0 +1,51 @@
dnl HP-PA 1.1 32-bit mpn_sqr_diagonal.
dnl Copyright 2001, 2002 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl The GNU MP Library is free software; you can redistribute it and/or modify
dnl it under the terms of the GNU Lesser General Public License as published
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
dnl your option) any later version.
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
dnl License for more details.
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
dnl MA 02111-1307, USA.
include(`../config.m4')
C This code runs at 6 cycles/limb on the PA7100 and 2.5 cycles/limb on PA8x00.
C 2-way unrolling wouldn't help the PA7100; it could however bring times down
C to 2.0 cycles/limb for the PA8x00.
C INPUT PARAMETERS
define(`rp',`%r26')
define(`up',`%r25')
define(`n',`%r24')
ASM_START()
PROLOGUE(mpn_sqr_diagonal)
ldo 4(rp),rp
fldws,ma 4(up),%fr4r
addib,= -1,n,L(exit)
xmpyu %fr4r,%fr4r,%fr5
.label L(loop)
fldws,ma 4(up),%fr4r
fstws %fr5r,-4(rp)
fstws,ma %fr5l,8(rp)
addib,<> -1,n,L(loop)
xmpyu %fr4r,%fr4r,%fr5
.label L(exit)
fstws %fr5r,-4(rp)
bv 0(%r2)
fstws %fr5l,0(rp)
EPILOGUE(mpn_sqr_diagonal)

View file

@ -0,0 +1,107 @@
dnl HP-PA 1.1 mpn_submul_1 -- Multiply a limb vector with a limb and subtract
dnl the result from a second limb vector.
dnl Copyright 1992, 1993, 1994, 2000, 2001, 2002 Free Software Foundation,
dnl Inc.
dnl This file is part of the GNU MP Library.
dnl The GNU MP Library is free software; you can redistribute it and/or modify
dnl it under the terms of the GNU Lesser General Public License as published
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
dnl your option) any later version.
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
dnl License for more details.
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
dnl MA 02111-1307, USA.
include(`../config.m4')
C INPUT PARAMETERS
C res_ptr r26
C s1_ptr r25
C size r24
C s2_limb r23
C This runs at 12 cycles/limb on a PA7000. With the used instructions, it can
C not become faster due to data cache contention after a store. On the PA7100
C it runs at 11 cycles/limb.
C There are some ideas described in mul_1.asm that applies to this code too.
C It seems possible to make this run as fast as mpn_addmul_1, if we use
C sub,>>= %r29,%r19,%r22
C addi 1,%r28,%r28
C but that requires reworking the hairy software pipeline...
ASM_START()
PROLOGUE(mpn_submul_1)
C .callinfo frame=64,no_calls
ldo 64(%r30),%r30
fldws,ma 4(%r25),%fr5
stw %r23,-16(%r30) C move s2_limb ...
addib,= -1,%r24,L(just_one_limb)
fldws -16(%r30),%fr4 C ... into fr4
add %r0,%r0,%r0 C clear carry
xmpyu %fr4,%fr5,%fr6
fldws,ma 4(%r25),%fr7
fstds %fr6,-16(%r30)
xmpyu %fr4,%fr7,%fr8
ldw -12(%r30),%r19 C least significant limb in product
ldw -16(%r30),%r28
fstds %fr8,-16(%r30)
addib,= -1,%r24,L(end)
ldw -12(%r30),%r1
C Main loop
.label L(loop)
ldws 0(%r26),%r29
fldws,ma 4(%r25),%fr5
sub %r29,%r19,%r22
add %r22,%r19,%r0
stws,ma %r22,4(%r26)
addc %r28,%r1,%r19
xmpyu %fr4,%fr5,%fr6
ldw -16(%r30),%r28
fstds %fr6,-16(%r30)
addc %r0,%r28,%r28
addib,<> -1,%r24,L(loop)
ldw -12(%r30),%r1
.label L(end)
ldw 0(%r26),%r29
sub %r29,%r19,%r22
add %r22,%r19,%r0
stws,ma %r22,4(%r26)
addc %r28,%r1,%r19
ldw -16(%r30),%r28
ldws 0(%r26),%r29
addc %r0,%r28,%r28
sub %r29,%r19,%r22
add %r22,%r19,%r0
stws,ma %r22,4(%r26)
addc %r0,%r28,%r28
bv 0(%r2)
ldo -64(%r30),%r30
.label L(just_one_limb)
xmpyu %fr4,%fr5,%fr6
ldw 0(%r26),%r29
fstds %fr6,-16(%r30)
ldw -12(%r30),%r1
ldw -16(%r30),%r28
sub %r29,%r1,%r22
add %r22,%r1,%r0
stw %r22,0(%r26)
addc %r0,%r28,%r28
bv 0(%r2)
ldo -64(%r30),%r30
EPILOGUE()

View file

@ -0,0 +1,96 @@
dnl HP-PA __udiv_qrnnd division support, used from longlong.h.
dnl This version runs fast on PA 7000 and later.
dnl Copyright 1993, 1994, 2000, 2001 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl The GNU MP Library is free software; you can redistribute it and/or modify
dnl it under the terms of the GNU Lesser General Public License as published
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
dnl your option) any later version.
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
dnl License for more details.
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
dnl MA 02111-1307, USA.
include(`../config.m4')
C INPUT PARAMETERS
C rem_ptr gr26
C n1 gr25
C n0 gr24
C d gr23
C This file has caused a lot of trouble, since it demands PIC reference to
C static data, which triggers bugs in gas (at least version 2.7 through
C 2.11.2). When the bug is triggered, many bogus relocs are generated. The
C current solution is to stuff data right into the code, and refer it using
C absolute offsets. Fragile to be sure, but nothing else seems to work.
ASM_START()
ifdef(`PIC',`',
` RODATA
INT64(L(0000), 0x43f00000, 0x0) C 2^64
')
PROLOGUE(mpn_udiv_qrnnd)
.proc
.callinfo frame=64,no_calls
.entry
ldo 64(%r30),%r30
stws %r25,-16(0,%r30) C n_hi
stws %r24,-12(0,%r30) C n_lo
ifdef(`PIC',
` bl .+20,%r31
dep %r0,31,2,%r31
.word 0x0 C padding for alignment
.word 0x43f00000, 0x0 C 2^64
ldo 4(%r31),%r31',
` ldil `L'%L(0000),%r31
ldo R%L(0000)(%r31),%r31')
fldds -16(0,%r30),%fr5
stws %r23,-12(0,%r30)
comib,<= 0,%r25,L(1)
fcnvxf,dbl,dbl %fr5,%fr5
fldds 0(0,%r31),%fr4
fadd,dbl %fr4,%fr5,%fr5
.label L(1)
fcpy,sgl %fr0,%fr6L
fldws -12(0,%r30),%fr6R
fcnvxf,dbl,dbl %fr6,%fr4
fdiv,dbl %fr5,%fr4,%fr5
fcnvfx,dbl,dbl %fr5,%fr4
fstws %fr4R,-16(%r30)
xmpyu %fr4R,%fr6R,%fr6
ldws -16(%r30),%r28
fstds %fr6,-16(0,%r30)
ldws -12(0,%r30),%r21
ldws -16(0,%r30),%r20
sub %r24,%r21,%r22
subb %r25,%r20,%r20
comib,= 0,%r20,L(2)
ldo -64(%r30),%r30
add %r22,%r23,%r22
ldo -1(%r28),%r28
.label L(2)
bv 0(%r2)
stws %r22,0(0,%r26)
.exit
.procend
EPILOGUE(mpn_udiv_qrnnd)

View file

@ -0,0 +1,38 @@
dnl Copyright 1999, 2001 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl The GNU MP Library is free software; you can redistribute it and/or modify
dnl it under the terms of the GNU Lesser General Public License as published
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
dnl your option) any later version.
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
dnl License for more details.
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
dnl MA 02111-1307, USA.
include(`../config.m4')
ASM_START()
PROLOGUE(mpn_umul_ppmm)
C .callinfo frame=64,no_calls
ldo 64(%r30),%r30
stw %r25,-16(0,%r30)
fldws -16(0,%r30),%fr22R
stw %r24,-16(0,%r30)
fldws -16(0,%r30),%fr22L
xmpyu %fr22R,%fr22L,%fr22
fstds %fr22,-16(0,%r30)
ldw -16(0,%r30),%r28
ldw -12(0,%r30),%r29
stw %r29,0(0,%r26)
bv 0(%r2)
ldo -64(%r30),%r30
EPILOGUE()

View file

@ -0,0 +1,98 @@
dnl HP-PA 2.0 32-bit mpn_add_n -- Add two limb vectors of the same length > 0
dnl and store sum in a third limb vector.
dnl Copyright 1997, 1998, 2000, 2001, 2002 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl The GNU MP Library is free software; you can redistribute it and/or modify
dnl it under the terms of the GNU Lesser General Public License as published
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
dnl your option) any later version.
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
dnl License for more details.
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
dnl MA 02111-1307, USA.
include(`../config.m4')
C INPUT PARAMETERS
C res_ptr gr26
C s1_ptr gr25
C s2_ptr gr24
C size gr23
C This runs at 2 cycles/limb on PA8000.
ASM_START()
PROLOGUE(mpn_add_n)
sub %r0,%r23,%r22
zdep %r22,30,3,%r28 C r28 = 2 * (-n & 7)
zdep %r22,29,3,%r22 C r22 = 4 * (-n & 7)
sub %r25,%r22,%r25 C offset s1_ptr
sub %r24,%r22,%r24 C offset s2_ptr
sub %r26,%r22,%r26 C offset res_ptr
blr %r28,%r0 C branch into loop
add %r0,%r0,%r0 C reset carry
.label L(loop)
ldw 0(%r25),%r20
ldw 0(%r24),%r31
addc %r20,%r31,%r20
stw %r20,0(%r26)
.label L(7)
ldw 4(%r25),%r21
ldw 4(%r24),%r19
addc %r21,%r19,%r21
stw %r21,4(%r26)
.label L(6)
ldw 8(%r25),%r20
ldw 8(%r24),%r31
addc %r20,%r31,%r20
stw %r20,8(%r26)
.label L(5)
ldw 12(%r25),%r21
ldw 12(%r24),%r19
addc %r21,%r19,%r21
stw %r21,12(%r26)
.label L(4)
ldw 16(%r25),%r20
ldw 16(%r24),%r31
addc %r20,%r31,%r20
stw %r20,16(%r26)
.label L(3)
ldw 20(%r25),%r21
ldw 20(%r24),%r19
addc %r21,%r19,%r21
stw %r21,20(%r26)
.label L(2)
ldw 24(%r25),%r20
ldw 24(%r24),%r31
addc %r20,%r31,%r20
stw %r20,24(%r26)
.label L(1)
ldw 28(%r25),%r21
ldo 32(%r25),%r25
ldw 28(%r24),%r19
addc %r21,%r19,%r21
stw %r21,28(%r26)
ldo 32(%r24),%r24
addib,> -8,%r23,L(loop)
ldo 32(%r26),%r26
bv (%r2)
addc %r0,%r0,%r28
EPILOGUE()

View file

@ -0,0 +1,57 @@
/* gmp-mparam.h -- Compiler/machine parameter header file.
Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002 Free Software Foundation,
Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or (at your
option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
MA 02111-1307, USA. */
#define BITS_PER_MP_LIMB 32
#define BYTES_PER_MP_LIMB 4
/* Generated by tuneup.c, 2001-02-18, gcc 2.95 */
#define MUL_KARATSUBA_THRESHOLD 16
#define MUL_TOOM3_THRESHOLD 129
#define SQR_BASECASE_THRESHOLD 6
#define SQR_KARATSUBA_THRESHOLD 48
#define SQR_TOOM3_THRESHOLD 153
#define DIV_SB_PREINV_THRESHOLD 6
#define DIV_DC_THRESHOLD 102
#define POWM_THRESHOLD 166
#define GCD_ACCEL_THRESHOLD 4
#define GCDEXT_THRESHOLD 0
#define DIVREM_1_NORM_THRESHOLD 4
#define DIVREM_1_UNNORM_THRESHOLD 6
#define MOD_1_NORM_THRESHOLD 4
#define MOD_1_UNNORM_THRESHOLD 6
#define USE_PREINV_MOD_1 0
#define DIVREM_2_THRESHOLD 0
#define DIVEXACT_1_THRESHOLD 0
#define MODEXACT_1_ODD_THRESHOLD 0
#define MUL_FFT_TABLE { 656, 928, 1920, 3584, 14336, 24576, 0 }
#define MUL_FFT_MODF_THRESHOLD 584
#define MUL_FFT_THRESHOLD 3840
#define SQR_FFT_TABLE { 656, 928, 1920, 3584, 14336, 24576, 0 }
#define SQR_FFT_MODF_THRESHOLD 616
#define SQR_FFT_THRESHOLD 3840

View file

@ -0,0 +1,103 @@
dnl HP-PA 32-bit mpn_sqr_diagonal optimized for the PA8x00.
dnl Copyright 2001, 2002 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl The GNU MP Library is free software; you can redistribute it and/or modify
dnl it under the terms of the GNU Lesser General Public License as published
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
dnl your option) any later version.
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
dnl License for more details.
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
dnl MA 02111-1307, USA.
include(`../config.m4')
C This code runs at 6 cycles/limb on the PA7100 and 2 cycles/limb on PA8x00.
C The 2-way unrolling is actually not helping the PA7100.
C INPUT PARAMETERS
define(`rp',`%r26')
define(`up',`%r25')
define(`n',`%r24')
ASM_START()
PROLOGUE(mpn_sqr_diagonal)
fldws,ma 4(up),%fr4r
addib,= -1,n,L(end1)
ldo 4(rp),rp
fldws,ma 4(up),%fr6r
addib,= -1,n,L(end2)
xmpyu %fr4r,%fr4r,%fr5
fldws,ma 4(up),%fr4r
addib,= -1,n,L(end3)
xmpyu %fr6r,%fr6r,%fr7
.label L(loop)
fldws,ma 4(up),%fr6r
fstws %fr5r,-4(rp)
fstws,ma %fr5l,8(rp)
addib,= -1,n,L(exite)
xmpyu %fr4r,%fr4r,%fr5
fldws,ma 4(up),%fr4r
fstws %fr7r,-4(rp)
fstws,ma %fr7l,8(rp)
addib,<> -1,n,L(loop)
xmpyu %fr6r,%fr6r,%fr7
.label L(exito)
fstws %fr5r,-4(rp)
fstws %fr5l,0(rp)
xmpyu %fr4r,%fr4r,%fr5
fstws %fr7r,4(rp)
fstws %fr7l,8(rp)
fstws,mb %fr5r,12(rp)
bv 0(%r2)
fstws %fr5l,4(rp)
.label L(exite)
fstws %fr7r,-4(rp)
fstws %fr7l,0(rp)
xmpyu %fr6r,%fr6r,%fr7
fstws %fr5r,4(rp)
fstws %fr5l,8(rp)
fstws,mb %fr7r,12(rp)
bv 0(%r2)
fstws %fr7l,4(rp)
.label L(end1)
xmpyu %fr4r,%fr4r,%fr5
fstws %fr5r,-4(rp)
bv 0(%r2)
fstws,ma %fr5l,8(rp)
.label L(end2)
xmpyu %fr6r,%fr6r,%fr7
fstws %fr5r,-4(rp)
fstws %fr5l,0(rp)
fstws %fr7r,4(rp)
bv 0(%r2)
fstws %fr7l,8(rp)
.label L(end3)
fstws %fr5r,-4(rp)
fstws %fr5l,0(rp)
xmpyu %fr4r,%fr4r,%fr5
fstws %fr7r,4(rp)
fstws %fr7l,8(rp)
fstws,mb %fr5r,12(rp)
bv 0(%r2)
fstws %fr5l,4(rp)
EPILOGUE(mpn_sqr_diagonal)

View file

@ -0,0 +1,98 @@
dnl HP-PA 2.0 32-bit mpn_sub_n -- Subtract two limb vectors of the same
dnl length > 0 and store difference in a third limb vector.
dnl Copyright 1997, 1998, 2000, 2001, 2002 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl The GNU MP Library is free software; you can redistribute it and/or modify
dnl it under the terms of the GNU Lesser General Public License as published
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
dnl your option) any later version.
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
dnl License for more details.
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
dnl MA 02111-1307, USA.
include(`../config.m4')
C INPUT PARAMETERS
C res_ptr gr26
C s1_ptr gr25
C s2_ptr gr24
C size gr23
C This runs at 2 cycles/limb on PA8000.
ASM_START()
PROLOGUE(mpn_sub_n)
sub %r0,%r23,%r22
zdep %r22,30,3,%r28 C r28 = 2 * (-n & 7)
zdep %r22,29,3,%r22 C r22 = 4 * (-n & 7)
sub %r25,%r22,%r25 C offset s1_ptr
sub %r24,%r22,%r24 C offset s2_ptr
blr %r28,%r0 C branch into loop
sub %r26,%r22,%r26 C offset res_ptr and set carry
.label L(loop)
ldw 0(%r25),%r20
ldw 0(%r24),%r31
subb %r20,%r31,%r20
stw %r20,0(%r26)
.label L(7)
ldw 4(%r25),%r21
ldw 4(%r24),%r19
subb %r21,%r19,%r21
stw %r21,4(%r26)
.label L(6)
ldw 8(%r25),%r20
ldw 8(%r24),%r31
subb %r20,%r31,%r20
stw %r20,8(%r26)
.label L(5)
ldw 12(%r25),%r21
ldw 12(%r24),%r19
subb %r21,%r19,%r21
stw %r21,12(%r26)
.label L(4)
ldw 16(%r25),%r20
ldw 16(%r24),%r31
subb %r20,%r31,%r20
stw %r20,16(%r26)
.label L(3)
ldw 20(%r25),%r21
ldw 20(%r24),%r19
subb %r21,%r19,%r21
stw %r21,20(%r26)
.label L(2)
ldw 24(%r25),%r20
ldw 24(%r24),%r31
subb %r20,%r31,%r20
stw %r20,24(%r26)
.label L(1)
ldw 28(%r25),%r21
ldo 32(%r25),%r25
ldw 28(%r24),%r19
subb %r21,%r19,%r21
stw %r21,28(%r26)
ldo 32(%r24),%r24
addib,> -8,%r23,L(loop)
ldo 32(%r26),%r26
addc %r0,%r0,%r28
bv (%r2)
subi 1,%r28,%r28
EPILOGUE()