Added 64-bit hashing

This commit is contained in:
jgarcia 2008-01-05 21:02:38 +00:00
parent b23b09e67a
commit 26d819ad80
3 changed files with 139 additions and 57 deletions

View file

@ -10,6 +10,8 @@ ECL 0.9k:
- RENAME-FILE now accepts a keyword argument :IF-EXISTS, which defines the
behavior when a file with the new name already exists (Geo Carncross).
- Improved hashing on 64-bit machines.
;;; Local Variables: ***
;;; mode:text ***
;;; fill-column:79 ***

View file

@ -28,7 +28,7 @@
* SBCL'S old mashing function. Leads to many collisions.
*/
#if FIXNUM_BITS > 32
#if 0
#define mash(h,n) ((((h) << 5) | ((h) >> (FIXNUM_BITS - 5))) ^ (n))
#define hash_word(h,x) mash(h,(cl_index)x)
@ -42,70 +42,150 @@ hash_string(cl_hashkey h, const unsigned char *buf, cl_index len)
return h;
}
#else
#endif
/*
* SBCL's newest algorithm. Leads to few collisions, is fast, but
* limited to 32 bits.
* SBCL's newest algorithm. Leads to few collisions, and it is faster.
*/
#define mix(a,b,c) \
{ \
a -= b; a -= c; a ^= (c>>13); \
b -= c; b -= a; b ^= (a<<8); \
c -= a; c -= b; c ^= (b>>13); \
a -= b; a -= c; a ^= (c>>12); \
b -= c; b -= a; b ^= (a<<16); \
c -= a; c -= b; c ^= (b>>5); \
a -= b; a -= c; a ^= (c>>3); \
b -= c; b -= a; b ^= (a<<10); \
c -= a; c -= b; c ^= (b>>15); \
}
#if FIXNUM_BITS > 32
/*
* 64 bit version
*/
#define GOLDEN_RATIO 0x9e3779b97f4a7c13L
#define mix(a,b,c) \
{ \
a=a-b; a=a-c; a=a^(c>>43); \
b=b-c; b=b-a; b=b^(a<<9); \
c=c-a; c=c-b; c=c^(b>>8); \
a=a-b; a=a-c; a=a^(c>>38); \
b=b-c; b=b-a; b=b^(a<<23); \
c=c-a; c=c-b; c=c^(b>>5); \
a=a-b; a=a-c; a=a^(c>>35); \
b=b-c; b=b-a; b=b^(a<<49); \
c=c-a; c=c-b; c=c^(b>>11); \
a=a-b; a=a-c; a=a^(c>>12); \
b=b-c; b=b-a; b=b^(a<<18); \
c=c-a; c=c-b; c=c^(b>>22); \
}
static uint32_t
hash_string(uint32_t initval, const unsigned char *k, cl_index len)
#define extract_word(k) \
(k[0]+((cl_index)k[1]<<8)+((cl_index)k[2]<<16)+((cl_index)k[3]<<24)+ \
((cl_index)k[4]<<32)+((cl_index)k[5]<<40)+((cl_index)k[6]<<48)+ \
((cl_index)k[7]<<52))
static cl_index
hash_string(cl_index initval, const unsigned char *k, cl_index len)
{
uint32_t a = 0, b = 0, c = initval;
for (; len > 12; ) {
a += (k[0] +((uint32_t)k[1]<<8) +((uint32_t)k[2]<<16) +((uint32_t)k[3]<<24));
b += (k[4] +((uint32_t)k[5]<<8) +((uint32_t)k[6]<<16) +((uint32_t)k[7]<<24));
c += (k[8] +((uint32_t)k[9]<<8) +((uint32_t)k[10]<<16)+((uint32_t)k[11]<<24));
mix(a,b,c);
k += 12; len -= 12;
}
register cl_index a = GOLDEN_RATIO, b = GOLDEN_RATIO, c = initval;
for (; len > 24; ) {
a += extract_word(k); k+=8;
b += extract_word(k); k+=8;
c += extract_word(k); k+=8;
mix(a,b,c);
len -= 24;
}
/*------------------------------------- handle the last 11 bytes */
c += len;
switch(len) {
/* all the case statements fall through */
case 11: c+=((uint32_t)k[10]<<24);
case 10: c+=((uint32_t)k[9]<<16);
case 9 : c+=((uint32_t)k[8]<<8);
/* the first byte of c is reserved for the length */
case 8 : b+=((uint32_t)k[7]<<24);
case 7 : b+=((uint32_t)k[6]<<16);
case 6 : b+=((uint32_t)k[5]<<8);
case 5 : b+=k[4];
case 4 : a+=((uint32_t)k[3]<<24);
case 3 : a+=((uint32_t)k[2]<<16);
case 2 : a+=((uint32_t)k[1]<<8);
case 1 : a+=k[0];
/* case 0: nothing left to add */
}
mix(a,b,c);
/*-------------------------------------------- report the result */
return c;
}
static uint32_t hash_word(uint32_t c, uint32_t a)
{
uint32_t b = 0;
mix(a, b, c);
/*------------------------------------- handle the last 11 bytes */
c += len;
switch(len) {
/* all the case statements fall through */
case 23: c+=((cl_index)k[22]<<52);
case 22: c+=((cl_index)k[21]<<48);
case 21: c+=((cl_index)k[20]<<40);
case 20: c+=((cl_index)k[19]<<32);
case 19: c+=((cl_index)k[18]<<24);
case 18: c+=((cl_index)k[17]<<16);
case 17: c+=((cl_index)k[16]<<8);
/* the first byte of c is reserved for the length */
case 16: b+=((cl_index)k[15]<<52);
case 15: b+=((cl_index)k[14]<<48);
case 14: b+=((cl_index)k[13]<<40);
case 13: b+=((cl_index)k[12]<<32);
case 12: b+=((cl_index)k[11]<<24);
case 11: b+=((cl_index)k[10]<<16);
case 10: b+=((cl_index)k[9]<<8);
case 9 : b+=k[8];
case 8 : a+=((cl_index)k[7]<<52);
case 7 : a+=((cl_index)k[6]<<48);
case 6 : a+=((cl_index)k[5]<<40);
case 5 : a+=((cl_index)k[4]<<32);
case 4 : a+=((cl_index)k[3]<<24);
case 3 : a+=((cl_index)k[2]<<16);
case 2 : a+=((cl_index)k[1]<<8);
case 1 : a+=k[0];
/* case 0: nothing left to add */
}
mix(a,b,c);
/*-------------------------------------------- report the result */
return c;
}
#else
/*
* 32 bit version
*/
#define GOLDEN_RATIO 0x9e3779b9L
#define mix(a,b,c) \
{ \
a -= b; a -= c; a ^= (c>>13); \
b -= c; b -= a; b ^= (a<<8); \
c -= a; c -= b; c ^= (b>>13); \
a -= b; a -= c; a ^= (c>>12); \
b -= c; b -= a; b ^= (a<<16); \
c -= a; c -= b; c ^= (b>>5); \
a -= b; a -= c; a ^= (c>>3); \
b -= c; b -= a; b ^= (a<<10); \
c -= a; c -= b; c ^= (b>>15); \
}
#define extract_word(k) \
(k[0]+((cl_index)k[1]<<8)+((cl_index)k[2]<<16)+((cl_index)k[3]<<24))
static cl_index
hash_string(cl_index initval, const unsigned char *k, cl_index len)
{
register cl_index a = GOLDEN_RATIO, b = GOLDEN_RATIO, c = initval;
for (; len > 12; ) {
a += extract_word(k); k += 4;
b += extract_word(k); k += 4;
c += extract_word(k); k += 4;
mix(a,b,c);
len -= 12;
}
/*------------------------------------- handle the last 11 bytes */
c += len;
switch(len) {
/* all the case statements fall through */
case 11: c+=((cl_index)k[10]<<24);
case 10: c+=((cl_index)k[9]<<16);
case 9 : c+=((cl_index)k[8]<<8);
/* the first byte of c is reserved for the length */
case 8 : b+=((cl_index)k[7]<<24);
case 7 : b+=((cl_index)k[6]<<16);
case 6 : b+=((cl_index)k[5]<<8);
case 5 : b+=k[4];
case 4 : a+=((cl_index)k[3]<<24);
case 3 : a+=((cl_index)k[2]<<16);
case 2 : a+=((cl_index)k[1]<<8);
case 1 : a+=k[0];
/* case 0: nothing left to add */
}
mix(a,b,c);
/*-------------------------------------------- report the result */
return c;
}
#endif
static cl_index hash_word(cl_index c, cl_index a)
{
cl_index b = GOLDEN_RATIO;
mix(a, b, c);
return c;
}
static void corrupted_hash(cl_object hashtable) /*__attribute__((noreturn))*/;
static void
@ -124,7 +204,7 @@ _hash_eql(cl_hashkey h, cl_object x)
return hash_string(h, (unsigned char*)x->big.big_limbs,
labs(x->big.big_size) * sizeof(mp_limb_t));
#else /* WITH_GMP */
return hash_word(h, (uint32_t)(x->big.big_num));
return hash_word(h, (cl_index)(x->big.big_num));
#endif /* WITH_GMP */
case t_ratio:
h = _hash_eql(h, x->ratio.num);
@ -171,7 +251,7 @@ _hash_equal(int depth, cl_hashkey h, cl_object x)
case t_base_string: {
cl_index i;
for (i = 0; i < x->base_string.fillp; i++) {
uint32_t w = x->base_string.self[i];
cl_index w = x->base_string.self[i];
h = hash_word(h, w);
}
break;
@ -179,7 +259,7 @@ _hash_equal(int depth, cl_hashkey h, cl_object x)
case t_string: {
cl_index i;
for (i = 0; i < x->string.fillp; i++) {
uint32_t w = CHAR_CODE(x->string.self[i]);
cl_index w = CHAR_CODE(x->string.self[i]);
h = hash_word(h, w);
}
break;

View file

@ -24,7 +24,7 @@ Returns, as a string, the location of the machine on which ECL runs."
(defun lisp-implementation-version ()
"Args:()
Returns the version of your ECL as a string."
"@PACKAGE_VERSION@ (CVS 2008-01-02 23:06)")
"@PACKAGE_VERSION@ (CVS 2008-01-05 22:02)")
(defun machine-type ()
"Args: ()