diff --git a/src/c/hash.d b/src/c/hash.d index 9182377fb..443d5b53e 100644 --- a/src/c/hash.d +++ b/src/c/hash.d @@ -268,6 +268,15 @@ _hash_equalp(int depth, cl_hashkey h, cl_object x) } } +static cl_hashkey _hash_generic(cl_object ht, cl_object key) { + cl_object hash_fun = ht->hash.generic_hash; + cl_object h_object = _ecl_funcall2(hash_fun, key); + if (!ECL_FIXNUMP(h_object) || ecl_fixnum_minusp(h_object)) { + FEwrong_type_argument(@'fixnum', h_object); + } + return ecl_fixnum(h_object); +} + #define HASH_TABLE_LOOP(hkey,hvalue,h,HASH_TABLE_LOOP_TEST) { \ cl_index hsize = hashtable->hash.size; \ cl_index i = h % hsize, j = hsize, k; \ @@ -293,9 +302,6 @@ _hash_equalp(int depth, cl_hashkey h, cl_object x) return hashtable->hash.data + j; \ } -#if 0 -#define HASH_TABLE_SET(h,loop,compute_key,store_key) -#else #define HASH_TABLE_SET(h,loop,compute_key,store_key) { \ cl_hashkey h = compute_key; \ struct ecl_hashtable_entry *e; \ @@ -313,17 +319,12 @@ AGAIN: \ e->value = value; \ return hashtable; \ } -#endif /* * EQ HASHTABLES */ -#if 0 -#define _hash_eq(k) ((cl_hashkey)(k) ^ ((cl_hashkey)(k) >> 16)) -#else #define _hash_eq(k) ((cl_hashkey)(k) >> 2) -#endif static struct ecl_hashtable_entry * _ecl_hash_loop_eq(cl_hashkey h, cl_object key, cl_object hashtable) @@ -517,6 +518,51 @@ _ecl_remhash_pack(cl_object key, cl_object hashtable) } } +/* + * Generic HASHTABLES + */ + +static bool +_ecl_generic_hash_test(cl_object fun, cl_object key, cl_object hkey) { + return (_ecl_funcall3(fun, key, hkey) != ECL_NIL); +} + +static struct ecl_hashtable_entry * +_ecl_hash_loop_generic(cl_hashkey h, cl_object key, cl_object hashtable) +{ + cl_object test_fun = hashtable->hash.generic_test; + HASH_TABLE_LOOP(hkey, hvalue, h, _ecl_generic_hash_test(test_fun, key, hkey)); +} + +static cl_object +_ecl_gethash_generic(cl_object key, cl_object hashtable, cl_object def) +{ + cl_hashkey h = _hash_generic(hashtable, key); + struct ecl_hashtable_entry *e = _ecl_hash_loop_generic(h, key, hashtable); + return (e->key == OBJNULL)? def : e->value; +} + +static cl_object +_ecl_sethash_generic(cl_object key, cl_object hashtable, cl_object value) +{ + HASH_TABLE_SET(h, _ecl_hash_loop_generic, _hash_generic(hashtable, key), key); +} + +static bool +_ecl_remhash_generic(cl_object key, cl_object hashtable) +{ + cl_hashkey h = _hash_generic(hashtable, key); + struct ecl_hashtable_entry *e = _ecl_hash_loop_generic(h, key, hashtable); + if (e->key == OBJNULL) { + return 0; + } else { + e->key = OBJNULL; + e->value = ECL_NIL; + hashtable->hash.entries--; + return 1; + } +} + /* * WEAK HASH TABLES */ @@ -528,11 +574,13 @@ _ecl_remhash_pack(cl_object key, cl_object hashtable) static cl_hashkey _ecl_hash_key(cl_object h, cl_object o) { switch (h->hash.test) { - case ecl_htt_eq: return _hash_eq(o); - case ecl_htt_eql: return _hash_eql(0, o); - case ecl_htt_equal: return _hash_equal(3, 0, o); - case ecl_htt_equalp: - default: return _hash_equalp(3, 0, o); + case ecl_htt_eq: return _hash_eq(o); + case ecl_htt_eql: return _hash_eql(0, o); + case ecl_htt_equal: return _hash_equal(3, 0, o); + case ecl_htt_equalp: return _hash_equalp(3, 0, o); + case ecl_htt_pack: return _hash_equal(3, 0, o); + case ecl_htt_generic: + return _hash_generic(h, o); } } @@ -631,14 +679,13 @@ _ecl_weak_hash_loop(cl_hashkey h, cl_object key, cl_object hashtable, continue; } switch (hashtable->hash.test) { - case ecl_htt_eq: - if (e.key == key) return p; - case ecl_htt_eql: - if (ecl_eql(e.key, key)) return p; - case ecl_htt_equal: - if (ecl_equal(e.key, key)) return p; - case ecl_htt_equalp: - if (ecl_equalp(e.key, key)) return p; + case ecl_htt_eq: if (e.key == key) return p; + case ecl_htt_eql: if (ecl_eql(e.key, key)) return p; + case ecl_htt_equal: if (ecl_equal(e.key, key)) return p; + case ecl_htt_equalp: if (ecl_equalp(e.key, key)) return p; + case ecl_htt_generic: + if (_ecl_generic_hash_test(hashtable->hash.generic_test, e.key, key)) + return p; } } return hashtable->hash.data + j; @@ -719,8 +766,11 @@ _ecl_sethash_sync(cl_object key, cl_object hashtable, cl_object value) cl_object output = ECL_NIL; cl_object sync_lock = hashtable->hash.sync_lock; mp_get_rwlock_write_wait(sync_lock); - output = hashtable->hash.set_unsafe(key, hashtable, value); - mp_giveup_rwlock_write(sync_lock); + ECL_UNWIND_PROTECT_BEGIN(ecl_process_env()) { + output = hashtable->hash.set_unsafe(key, hashtable, value); + } ECL_UNWIND_PROTECT_THREAD_SAFE_EXIT { + mp_giveup_rwlock_write(sync_lock); + } ECL_UNWIND_PROTECT_THREAD_SAFE_END; return output; } @@ -730,8 +780,11 @@ _ecl_gethash_sync(cl_object key, cl_object hashtable, cl_object def) cl_object output = ECL_NIL; cl_object sync_lock = hashtable->hash.sync_lock; mp_get_rwlock_read_wait(sync_lock); - output = hashtable->hash.get_unsafe(key, hashtable, def); - mp_giveup_rwlock_read(sync_lock); + ECL_UNWIND_PROTECT_BEGIN(ecl_process_env()) { + output = hashtable->hash.get_unsafe(key, hashtable, def); + } ECL_UNWIND_PROTECT_THREAD_SAFE_EXIT { + mp_giveup_rwlock_read(sync_lock); + } ECL_UNWIND_PROTECT_THREAD_SAFE_END; return output; } @@ -741,8 +794,11 @@ _ecl_remhash_sync(cl_object key, cl_object hashtable) bool output = 0; cl_object sync_lock = hashtable->hash.sync_lock; mp_get_rwlock_write_wait(sync_lock); - output = hashtable->hash.rem_unsafe(key, hashtable); - mp_giveup_rwlock_write(sync_lock); + ECL_UNWIND_PROTECT_BEGIN(ecl_process_env()) { + output = hashtable->hash.rem_unsafe(key, hashtable); + } ECL_UNWIND_PROTECT_THREAD_SAFE_EXIT { + mp_giveup_rwlock_write(sync_lock); + } ECL_UNWIND_PROTECT_THREAD_SAFE_END; return output; } #endif @@ -836,6 +892,7 @@ ecl_extend_hashtable(cl_object hashtable) } @(defun make_hash_table (&key (test @'eql') + (hash_function ECL_NIL) (weakness ECL_NIL) (synchronized ECL_NIL) (size ecl_make_fixnum(1024)) @@ -843,6 +900,17 @@ ecl_extend_hashtable(cl_object hashtable) (rehash_threshold cl_core.rehash_threshold)) @ { cl_object hash = cl__make_hash_table(test, size, rehash_size, rehash_threshold); + if (hash->hash.test == ecl_htt_generic) { + /* Normally we would make hash_function an argument to + cl__make_hash_table and put this test in there and void + unnecessary object allocation, but we do not want to + compromise the API. -- jd 2019-05-23 */ + if (hash_function == ECL_NIL) { + FEerror("~S is an illegal hash-table test function.", 1, test); + } + hash_function = si_coerce_to_function(hash_function); + hash->hash.generic_hash = hash_function; + } #ifdef ECL_WEAK_HASH if (!Null(weakness)) { if (weakness == @':key') { @@ -910,6 +978,7 @@ cl__make_hash_table(cl_object test, cl_object size, cl_object rehash_size, int htt; cl_index hsize; cl_object h; + cl_object hash_test = ECL_NIL, hash_func = ECL_NIL; cl_object (*get)(cl_object, cl_object, cl_object); cl_object (*set)(cl_object, cl_object, cl_object); bool (*rem)(cl_object, cl_object); @@ -942,8 +1011,11 @@ cl__make_hash_table(cl_object test, cl_object size, cl_object rehash_size, set = _ecl_sethash_pack; rem = _ecl_remhash_pack; } else { - FEerror("~S is an illegal hash-table test function.", - 1, test); + htt = ecl_htt_generic; + get = _ecl_gethash_generic; + set = _ecl_sethash_generic; + rem = _ecl_remhash_generic; + hash_test = si_coerce_to_function(test); } if (ecl_unlikely(!ECL_FIXNUMP(size) || ecl_fixnum_minusp(size) || @@ -989,6 +1061,8 @@ cl__make_hash_table(cl_object test, cl_object size, cl_object rehash_size, h = ecl_alloc_object(t_hashtable); h->hash.test = htt; h->hash.weak = ecl_htt_not_weak; + h->hash.generic_test = hash_test; + h->hash.generic_hash = hash_func; h->hash.get = get; h->hash.set = set; h->hash.rem = rem; @@ -1035,6 +1109,11 @@ ecl_reconstruct_serialized_hashtable(cl_object h) { h->hash.set = _ecl_sethash_pack; h->hash.rem = _ecl_remhash_pack; break; + case ecl_htt_generic: + h->hash.get = _ecl_gethash_generic; + h->hash.set = _ecl_sethash_generic; + h->hash.rem = _ecl_remhash_generic; + break; } if (h->hash.weak != ecl_htt_not_weak) { h->hash.get = _ecl_gethash_weak; @@ -1139,12 +1218,13 @@ cl_hash_table_test(cl_object ht) cl_object output; assert_type_hash_table(@[hash-table-test], 1, ht); switch(ht->hash.test) { - case ecl_htt_eq: output = @'eq'; break; - case ecl_htt_eql: output = @'eql'; break; - case ecl_htt_equal: output = @'equal'; break; - case ecl_htt_equalp: output = @'equalp'; break; - case ecl_htt_pack: - default: output = @'equal'; + case ecl_htt_eq: output = @'eq'; break; + case ecl_htt_eql: output = @'eql'; break; + case ecl_htt_equal: output = @'equal'; break; + case ecl_htt_equalp: output = @'equalp'; break; + case ecl_htt_pack: output = @'equal'; break; + case ecl_htt_generic: output = ht->hash.generic_test; + default: FEerror("hash-table-test: unknown test.", 0); } @(return output); } @@ -1322,6 +1402,7 @@ si_copy_hash_table(cl_object orig) cl_hash_table_size(orig), cl_hash_table_rehash_size(orig), cl_hash_table_rehash_threshold(orig)); + hash->hash.generic_hash = orig->hash.generic_hash, memcpy(hash->hash.data, orig->hash.data, orig->hash.size * sizeof(*orig->hash.data)); hash->hash.entries = orig->hash.entries; diff --git a/src/c/printer/write_ugly.d b/src/c/printer/write_ugly.d index 88912fc65..ae07a3887 100644 --- a/src/c/printer/write_ugly.d +++ b/src/c/printer/write_ugly.d @@ -176,8 +176,11 @@ write_hashtable(cl_object x, cl_object stream) { if (ecl_print_readably() && !Null(ecl_symbol_value(@'*read-eval*'))) { cl_object make = - cl_list(9, @'make-hash-table', + cl_list(15, @'make-hash-table', @':size', cl_hash_table_size(x), + @':synchronized', si_hash_table_synchronized_p(x), + @':weakness', si_hash_table_weakness(x), + @':hash-function', x->hash.generic_hash, @':rehash-size', cl_hash_table_rehash_size(x), @':rehash-threshold', cl_hash_table_rehash_threshold(x), @':test', cl_list(2, @'quote', cl_hash_table_test(x))); diff --git a/src/c/symbols_list.h b/src/c/symbols_list.h index f45f987dd..9b8c3e79c 100755 --- a/src/c/symbols_list.h +++ b/src/c/symbols_list.h @@ -1368,6 +1368,7 @@ cl_symbols[] = { {KEY_ "FORMAT-CONTROL", KEYWORD, NULL, -1, OBJNULL}, {KEY_ "FUNCTION", KEYWORD, NULL, -1, OBJNULL}, {KEY_ "GENSYM", KEYWORD, NULL, -1, OBJNULL}, +{KEY_ "HASH-FUNCTION", KEYWORD, NULL, -1, OBJNULL}, {KEY_ "HOST", KEYWORD, NULL, -1, OBJNULL}, {KEY_ "IF-DOES-NOT-EXIST", KEYWORD, NULL, -1, OBJNULL}, {KEY_ "IF-ERROR-EXISTS", KEYWORD, NULL, -1, OBJNULL}, diff --git a/src/c/symbols_list2.h b/src/c/symbols_list2.h index 90e62b3c5..a70a355b1 100644 --- a/src/c/symbols_list2.h +++ b/src/c/symbols_list2.h @@ -1368,6 +1368,7 @@ cl_symbols[] = { {KEY_ "FORMAT-CONTROL",NULL}, {KEY_ "FUNCTION",NULL}, {KEY_ "GENSYM",NULL}, +{KEY_ "HASH-FUNCTION",NULL}, {KEY_ "HOST",NULL}, {KEY_ "IF-DOES-NOT-EXIST",NULL}, {KEY_ "IF-ERROR-EXISTS",NULL}, diff --git a/src/doc/help.lsp b/src/doc/help.lsp index 464eb5da8..89e16185e 100644 --- a/src/doc/help.lsp +++ b/src/doc/help.lsp @@ -1961,17 +1961,36 @@ An echo stream is notated as # where N is a number that identifies the stream.") -(docfun make-hash-table function (&key (test 'eql) (size 1024) (rehash-size 1.5) (rehash-threshold 0.7)) " +(docfun make-hash-table function (&key (test 'eql) (size 1024) (rehash-size 1.5) (rehash-threshold 0.7) (hash-function nil) (synchronized nil) (weakness nil)) " Creates and returns a hash-table. -TEST specifies which predicate should be used to access hash-table entries. -It must be EQ, EQL, or EQUAL. SIZE specifies the number of entries in the -hash-table. REHASH-SIZE, if an integer, specifies how many entries should be -added when the hash-table becomes 'almost full'. REHASH-SIZE, if a float, -specifies the ratio of the new size and the old size. REHASH-THRESHOLD -specifies when to expand the hash-table. If an integer, the hash-table is -expanded when REHASH-THRESHOLD / REHASH-SIZE entries have been used. If a -float, the hash-table is expanded when REHASH-THRESHOLD times the whole -entries have been used.") + +TEST specifies which predicate should be used to access hash-table +entries. It must be EQ, EQL, EQUAL, EQUALP or a function accepting +two arguments. If it is a function then HASH-FUNCTION must be +supplied. + +HASH-FUNCTION is used alongside with a custom TEST predicate. It +accepts one argument and must return a positive fixnum being the +object's hash. + +SIZE specifies the number of entries in the hash-table. + +REHASH-SIZE, if an integer, specifies how many entries should be added +when the hash-table becomes 'almost full'. REHASH-SIZE, if a float, +specifies the ratio of the new size and the old size. + +REHASH-THRESHOLD specifies when to expand the hash-table. If an +integer, the hash-table is expanded when REHASH-THRESHOLD / + +REHASH-SIZE entries have been used. If a float, the hash-table is +expanded when REHASH-THRESHOLD times the whole entries have been used. + +SYNCHRONIZE if T then gethash, (setf gethash) and remhash operations +are protected by a lock - in this case hash tables may be used from +different threads without explicit synchronization. + +WEAKNESS is a GC extension and may be one of NIL, :KEY, :VALUE, +:KEY-AND-VALUE or :KEY-OR-VALUE. ") (docfun make-list function (length &key (initial-element nil)) " Creates and returns a list of the specified LENGTH, whose elements are all the diff --git a/src/doc/manual/standards/hashtables.txi b/src/doc/manual/standards/hashtables.txi index 78549f026..092f98345 100644 --- a/src/doc/manual/standards/hashtables.txi +++ b/src/doc/manual/standards/hashtables.txi @@ -63,6 +63,16 @@ table may have some content already, but conflicting keys will be overwritten. @end deffn +@subsubsection Custom equivalence predicate +@cindex Hash table generic test + +@code{make-hash-table} may accept arbitrary @code{:test} keyword for +the equivalence predicate. If it is not one of the standard predicates +(@code{:eq}, @code{:eql}, @code{:equal}, @code{:equalp}) a keyword +argument @code{:hashing-function} must be a function accepting one +argument and returning a positive fixnum. Otherwise the argument is +ignored. + @subsubsection Example @exindex Hash table extensions example @lisp diff --git a/src/h/object.h b/src/h/object.h index 48094dc40..0377d1b43 100644 --- a/src/h/object.h +++ b/src/h/object.h @@ -378,7 +378,8 @@ enum ecl_httest { /* hash table key test function */ ecl_htt_eql, /* eql */ ecl_htt_equal, /* equal */ ecl_htt_equalp, /* equalp */ - ecl_htt_pack /* symbol hash */ + ecl_htt_pack, /* symbol hash */ + ecl_htt_generic /* generic user-supplied test */ }; enum ecl_htweak { @@ -398,6 +399,8 @@ struct ecl_hashtable { /* hash table header */ _ECL_HDR2(test,weak); struct ecl_hashtable_entry *data; /* pointer to the hash table */ cl_object sync_lock; /* synchronization lock */ + cl_object generic_test; /* generic test function */ + cl_object generic_hash; /* generic hashing function */ cl_index entries; /* number of entries */ cl_index size; /* hash table size */ cl_index limit; /* hash table threshold (integer value) */ diff --git a/src/tests/normal-tests/hash-tables.lsp b/src/tests/normal-tests/hash-tables.lsp index d7d0d1d3e..c9f2e5355 100644 --- a/src/tests/normal-tests/hash-tables.lsp +++ b/src/tests/normal-tests/hash-tables.lsp @@ -76,3 +76,37 @@ (is (= 3 (gethash :foo ht))) (is-true (remhash :bar ht)) (is (= 1 (hash-table-count ht))))) + + +;;; generic test and hash functions + +;;; In this test we provide an equality predicate which distinguishes +;;; only two types of numbers: odd and even. HT is synchronized +;;; because we want also to check, if lock is not hogged by errors +;;; inside our function (we pass string for that purpose). +(test hash-tables.custom + (flet ((not-so-fancy-equals (x y) + (if (zerop x) + (= x y) + (eql (evenp x) (evenp y)))) + (not-so-fancy-hash (x) + (cond ((zerop x) 0) + ((evenp x) 1) + (T 2)))) + (signals error (make-hash-table :test #'not-so-fancy-equals)) + (let ((ht (make-hash-table :test #'not-so-fancy-equals + :hash-function #'not-so-fancy-hash + :synchronized t))) + (finishes + (setf (gethash 13 ht) 42 + (gethash 12 ht) 33 + (gethash 10 ht) 55)) + (is (= (gethash 12 ht) 55)) + (is (= (gethash 1 ht) 42)) + (is (null (gethash 0 ht))) + (signals error (gethash "foobar" ht)) + (signals error (setf (gethash "foobar" ht) 15)) + (finishes (remhash 3 ht)) + (is (null (gethash 1 ht))) + (finishes (setf (gethash 55 ht) 0)) + (is (= (gethash 13 ht) 0)))))