diff --git a/src/CHANGELOG b/src/CHANGELOG index 69c614c9c..baf2aed39 100644 --- a/src/CHANGELOG +++ b/src/CHANGELOG @@ -10,6 +10,8 @@ ECL 0.9k: - RENAME-FILE now accepts a keyword argument :IF-EXISTS, which defines the behavior when a file with the new name already exists (Geo Carncross). + - Improved hashing on 64-bit machines. + ;;; Local Variables: *** ;;; mode:text *** ;;; fill-column:79 *** diff --git a/src/c/hash.d b/src/c/hash.d index cd5b08316..62b3a9809 100644 --- a/src/c/hash.d +++ b/src/c/hash.d @@ -28,7 +28,7 @@ * SBCL'S old mashing function. Leads to many collisions. */ -#if FIXNUM_BITS > 32 +#if 0 #define mash(h,n) ((((h) << 5) | ((h) >> (FIXNUM_BITS - 5))) ^ (n)) #define hash_word(h,x) mash(h,(cl_index)x) @@ -42,70 +42,150 @@ hash_string(cl_hashkey h, const unsigned char *buf, cl_index len) return h; } -#else +#endif /* - * SBCL's newest algorithm. Leads to few collisions, is fast, but - * limited to 32 bits. + * SBCL's newest algorithm. Leads to few collisions, and it is faster. */ -#define mix(a,b,c) \ -{ \ - a -= b; a -= c; a ^= (c>>13); \ - b -= c; b -= a; b ^= (a<<8); \ - c -= a; c -= b; c ^= (b>>13); \ - a -= b; a -= c; a ^= (c>>12); \ - b -= c; b -= a; b ^= (a<<16); \ - c -= a; c -= b; c ^= (b>>5); \ - a -= b; a -= c; a ^= (c>>3); \ - b -= c; b -= a; b ^= (a<<10); \ - c -= a; c -= b; c ^= (b>>15); \ -} +#if FIXNUM_BITS > 32 +/* + * 64 bit version + */ +#define GOLDEN_RATIO 0x9e3779b97f4a7c13L +#define mix(a,b,c) \ + { \ + a=a-b; a=a-c; a=a^(c>>43); \ + b=b-c; b=b-a; b=b^(a<<9); \ + c=c-a; c=c-b; c=c^(b>>8); \ + a=a-b; a=a-c; a=a^(c>>38); \ + b=b-c; b=b-a; b=b^(a<<23); \ + c=c-a; c=c-b; c=c^(b>>5); \ + a=a-b; a=a-c; a=a^(c>>35); \ + b=b-c; b=b-a; b=b^(a<<49); \ + c=c-a; c=c-b; c=c^(b>>11); \ + a=a-b; a=a-c; a=a^(c>>12); \ + b=b-c; b=b-a; b=b^(a<<18); \ + c=c-a; c=c-b; c=c^(b>>22); \ + } -static uint32_t -hash_string(uint32_t initval, const unsigned char *k, cl_index len) +#define extract_word(k) \ + (k[0]+((cl_index)k[1]<<8)+((cl_index)k[2]<<16)+((cl_index)k[3]<<24)+ \ + ((cl_index)k[4]<<32)+((cl_index)k[5]<<40)+((cl_index)k[6]<<48)+ \ + ((cl_index)k[7]<<52)) + +static cl_index +hash_string(cl_index initval, const unsigned char *k, cl_index len) { - uint32_t a = 0, b = 0, c = initval; - for (; len > 12; ) { - a += (k[0] +((uint32_t)k[1]<<8) +((uint32_t)k[2]<<16) +((uint32_t)k[3]<<24)); - b += (k[4] +((uint32_t)k[5]<<8) +((uint32_t)k[6]<<16) +((uint32_t)k[7]<<24)); - c += (k[8] +((uint32_t)k[9]<<8) +((uint32_t)k[10]<<16)+((uint32_t)k[11]<<24)); - mix(a,b,c); - k += 12; len -= 12; - } + register cl_index a = GOLDEN_RATIO, b = GOLDEN_RATIO, c = initval; + for (; len > 24; ) { + a += extract_word(k); k+=8; + b += extract_word(k); k+=8; + c += extract_word(k); k+=8; + mix(a,b,c); + len -= 24; + } - /*------------------------------------- handle the last 11 bytes */ - c += len; - switch(len) { - /* all the case statements fall through */ - case 11: c+=((uint32_t)k[10]<<24); - case 10: c+=((uint32_t)k[9]<<16); - case 9 : c+=((uint32_t)k[8]<<8); - /* the first byte of c is reserved for the length */ - case 8 : b+=((uint32_t)k[7]<<24); - case 7 : b+=((uint32_t)k[6]<<16); - case 6 : b+=((uint32_t)k[5]<<8); - case 5 : b+=k[4]; - case 4 : a+=((uint32_t)k[3]<<24); - case 3 : a+=((uint32_t)k[2]<<16); - case 2 : a+=((uint32_t)k[1]<<8); - case 1 : a+=k[0]; - /* case 0: nothing left to add */ - } - mix(a,b,c); - /*-------------------------------------------- report the result */ - return c; -} - -static uint32_t hash_word(uint32_t c, uint32_t a) -{ - uint32_t b = 0; - mix(a, b, c); + /*------------------------------------- handle the last 11 bytes */ + c += len; + switch(len) { + /* all the case statements fall through */ + case 23: c+=((cl_index)k[22]<<52); + case 22: c+=((cl_index)k[21]<<48); + case 21: c+=((cl_index)k[20]<<40); + case 20: c+=((cl_index)k[19]<<32); + case 19: c+=((cl_index)k[18]<<24); + case 18: c+=((cl_index)k[17]<<16); + case 17: c+=((cl_index)k[16]<<8); + /* the first byte of c is reserved for the length */ + case 16: b+=((cl_index)k[15]<<52); + case 15: b+=((cl_index)k[14]<<48); + case 14: b+=((cl_index)k[13]<<40); + case 13: b+=((cl_index)k[12]<<32); + case 12: b+=((cl_index)k[11]<<24); + case 11: b+=((cl_index)k[10]<<16); + case 10: b+=((cl_index)k[9]<<8); + case 9 : b+=k[8]; + case 8 : a+=((cl_index)k[7]<<52); + case 7 : a+=((cl_index)k[6]<<48); + case 6 : a+=((cl_index)k[5]<<40); + case 5 : a+=((cl_index)k[4]<<32); + case 4 : a+=((cl_index)k[3]<<24); + case 3 : a+=((cl_index)k[2]<<16); + case 2 : a+=((cl_index)k[1]<<8); + case 1 : a+=k[0]; + /* case 0: nothing left to add */ + } + mix(a,b,c); + /*-------------------------------------------- report the result */ return c; } +#else +/* + * 32 bit version + */ + +#define GOLDEN_RATIO 0x9e3779b9L +#define mix(a,b,c) \ + { \ + a -= b; a -= c; a ^= (c>>13); \ + b -= c; b -= a; b ^= (a<<8); \ + c -= a; c -= b; c ^= (b>>13); \ + a -= b; a -= c; a ^= (c>>12); \ + b -= c; b -= a; b ^= (a<<16); \ + c -= a; c -= b; c ^= (b>>5); \ + a -= b; a -= c; a ^= (c>>3); \ + b -= c; b -= a; b ^= (a<<10); \ + c -= a; c -= b; c ^= (b>>15); \ + } +#define extract_word(k) \ + (k[0]+((cl_index)k[1]<<8)+((cl_index)k[2]<<16)+((cl_index)k[3]<<24)) + + +static cl_index +hash_string(cl_index initval, const unsigned char *k, cl_index len) +{ + register cl_index a = GOLDEN_RATIO, b = GOLDEN_RATIO, c = initval; + for (; len > 12; ) { + a += extract_word(k); k += 4; + b += extract_word(k); k += 4; + c += extract_word(k); k += 4; + mix(a,b,c); + len -= 12; + } + + /*------------------------------------- handle the last 11 bytes */ + c += len; + switch(len) { + /* all the case statements fall through */ + case 11: c+=((cl_index)k[10]<<24); + case 10: c+=((cl_index)k[9]<<16); + case 9 : c+=((cl_index)k[8]<<8); + /* the first byte of c is reserved for the length */ + case 8 : b+=((cl_index)k[7]<<24); + case 7 : b+=((cl_index)k[6]<<16); + case 6 : b+=((cl_index)k[5]<<8); + case 5 : b+=k[4]; + case 4 : a+=((cl_index)k[3]<<24); + case 3 : a+=((cl_index)k[2]<<16); + case 2 : a+=((cl_index)k[1]<<8); + case 1 : a+=k[0]; + /* case 0: nothing left to add */ + } + mix(a,b,c); + /*-------------------------------------------- report the result */ + return c; +} #endif +static cl_index hash_word(cl_index c, cl_index a) +{ + cl_index b = GOLDEN_RATIO; + mix(a, b, c); + return c; +} + static void corrupted_hash(cl_object hashtable) /*__attribute__((noreturn))*/; static void @@ -124,7 +204,7 @@ _hash_eql(cl_hashkey h, cl_object x) return hash_string(h, (unsigned char*)x->big.big_limbs, labs(x->big.big_size) * sizeof(mp_limb_t)); #else /* WITH_GMP */ - return hash_word(h, (uint32_t)(x->big.big_num)); + return hash_word(h, (cl_index)(x->big.big_num)); #endif /* WITH_GMP */ case t_ratio: h = _hash_eql(h, x->ratio.num); @@ -171,7 +251,7 @@ _hash_equal(int depth, cl_hashkey h, cl_object x) case t_base_string: { cl_index i; for (i = 0; i < x->base_string.fillp; i++) { - uint32_t w = x->base_string.self[i]; + cl_index w = x->base_string.self[i]; h = hash_word(h, w); } break; @@ -179,7 +259,7 @@ _hash_equal(int depth, cl_hashkey h, cl_object x) case t_string: { cl_index i; for (i = 0; i < x->string.fillp; i++) { - uint32_t w = CHAR_CODE(x->string.self[i]); + cl_index w = CHAR_CODE(x->string.self[i]); h = hash_word(h, w); } break; diff --git a/src/lsp/config.lsp.in b/src/lsp/config.lsp.in index 6ecf153df..85e079c0d 100644 --- a/src/lsp/config.lsp.in +++ b/src/lsp/config.lsp.in @@ -24,7 +24,7 @@ Returns, as a string, the location of the machine on which ECL runs." (defun lisp-implementation-version () "Args:() Returns the version of your ECL as a string." - "@PACKAGE_VERSION@ (CVS 2008-01-02 23:06)") + "@PACKAGE_VERSION@ (CVS 2008-01-05 22:02)") (defun machine-type () "Args: ()