hash-table: add extension for generic predicates

Added:
- implementation
- test
- documentaiton entries

Additionally:
- remove #if 0 code branches (unused clutter)
- bring up-to-date help.lsp file for hints in slime
- wrap synchronized access in unwind protect
- write_ugly did not carry extensions in the printer
This commit is contained in:
Daniel Kochmański 2019-05-23 14:08:11 +02:00
parent aa985f566f
commit 6e5016dcb8
8 changed files with 199 additions and 47 deletions

View file

@ -268,6 +268,15 @@ _hash_equalp(int depth, cl_hashkey h, cl_object x)
}
}
static cl_hashkey _hash_generic(cl_object ht, cl_object key) {
cl_object hash_fun = ht->hash.generic_hash;
cl_object h_object = _ecl_funcall2(hash_fun, key);
if (!ECL_FIXNUMP(h_object) || ecl_fixnum_minusp(h_object)) {
FEwrong_type_argument(@'fixnum', h_object);
}
return ecl_fixnum(h_object);
}
#define HASH_TABLE_LOOP(hkey,hvalue,h,HASH_TABLE_LOOP_TEST) { \
cl_index hsize = hashtable->hash.size; \
cl_index i = h % hsize, j = hsize, k; \
@ -293,9 +302,6 @@ _hash_equalp(int depth, cl_hashkey h, cl_object x)
return hashtable->hash.data + j; \
}
#if 0
#define HASH_TABLE_SET(h,loop,compute_key,store_key)
#else
#define HASH_TABLE_SET(h,loop,compute_key,store_key) { \
cl_hashkey h = compute_key; \
struct ecl_hashtable_entry *e; \
@ -313,17 +319,12 @@ AGAIN: \
e->value = value; \
return hashtable; \
}
#endif
/*
* EQ HASHTABLES
*/
#if 0
#define _hash_eq(k) ((cl_hashkey)(k) ^ ((cl_hashkey)(k) >> 16))
#else
#define _hash_eq(k) ((cl_hashkey)(k) >> 2)
#endif
static struct ecl_hashtable_entry *
_ecl_hash_loop_eq(cl_hashkey h, cl_object key, cl_object hashtable)
@ -517,6 +518,51 @@ _ecl_remhash_pack(cl_object key, cl_object hashtable)
}
}
/*
* Generic HASHTABLES
*/
static bool
_ecl_generic_hash_test(cl_object fun, cl_object key, cl_object hkey) {
return (_ecl_funcall3(fun, key, hkey) != ECL_NIL);
}
static struct ecl_hashtable_entry *
_ecl_hash_loop_generic(cl_hashkey h, cl_object key, cl_object hashtable)
{
cl_object test_fun = hashtable->hash.generic_test;
HASH_TABLE_LOOP(hkey, hvalue, h, _ecl_generic_hash_test(test_fun, key, hkey));
}
static cl_object
_ecl_gethash_generic(cl_object key, cl_object hashtable, cl_object def)
{
cl_hashkey h = _hash_generic(hashtable, key);
struct ecl_hashtable_entry *e = _ecl_hash_loop_generic(h, key, hashtable);
return (e->key == OBJNULL)? def : e->value;
}
static cl_object
_ecl_sethash_generic(cl_object key, cl_object hashtable, cl_object value)
{
HASH_TABLE_SET(h, _ecl_hash_loop_generic, _hash_generic(hashtable, key), key);
}
static bool
_ecl_remhash_generic(cl_object key, cl_object hashtable)
{
cl_hashkey h = _hash_generic(hashtable, key);
struct ecl_hashtable_entry *e = _ecl_hash_loop_generic(h, key, hashtable);
if (e->key == OBJNULL) {
return 0;
} else {
e->key = OBJNULL;
e->value = ECL_NIL;
hashtable->hash.entries--;
return 1;
}
}
/*
* WEAK HASH TABLES
*/
@ -528,11 +574,13 @@ _ecl_remhash_pack(cl_object key, cl_object hashtable)
static cl_hashkey
_ecl_hash_key(cl_object h, cl_object o) {
switch (h->hash.test) {
case ecl_htt_eq: return _hash_eq(o);
case ecl_htt_eql: return _hash_eql(0, o);
case ecl_htt_equal: return _hash_equal(3, 0, o);
case ecl_htt_equalp:
default: return _hash_equalp(3, 0, o);
case ecl_htt_eq: return _hash_eq(o);
case ecl_htt_eql: return _hash_eql(0, o);
case ecl_htt_equal: return _hash_equal(3, 0, o);
case ecl_htt_equalp: return _hash_equalp(3, 0, o);
case ecl_htt_pack: return _hash_equal(3, 0, o);
case ecl_htt_generic:
return _hash_generic(h, o);
}
}
@ -631,14 +679,13 @@ _ecl_weak_hash_loop(cl_hashkey h, cl_object key, cl_object hashtable,
continue;
}
switch (hashtable->hash.test) {
case ecl_htt_eq:
if (e.key == key) return p;
case ecl_htt_eql:
if (ecl_eql(e.key, key)) return p;
case ecl_htt_equal:
if (ecl_equal(e.key, key)) return p;
case ecl_htt_equalp:
if (ecl_equalp(e.key, key)) return p;
case ecl_htt_eq: if (e.key == key) return p;
case ecl_htt_eql: if (ecl_eql(e.key, key)) return p;
case ecl_htt_equal: if (ecl_equal(e.key, key)) return p;
case ecl_htt_equalp: if (ecl_equalp(e.key, key)) return p;
case ecl_htt_generic:
if (_ecl_generic_hash_test(hashtable->hash.generic_test, e.key, key))
return p;
}
}
return hashtable->hash.data + j;
@ -719,8 +766,11 @@ _ecl_sethash_sync(cl_object key, cl_object hashtable, cl_object value)
cl_object output = ECL_NIL;
cl_object sync_lock = hashtable->hash.sync_lock;
mp_get_rwlock_write_wait(sync_lock);
output = hashtable->hash.set_unsafe(key, hashtable, value);
mp_giveup_rwlock_write(sync_lock);
ECL_UNWIND_PROTECT_BEGIN(ecl_process_env()) {
output = hashtable->hash.set_unsafe(key, hashtable, value);
} ECL_UNWIND_PROTECT_THREAD_SAFE_EXIT {
mp_giveup_rwlock_write(sync_lock);
} ECL_UNWIND_PROTECT_THREAD_SAFE_END;
return output;
}
@ -730,8 +780,11 @@ _ecl_gethash_sync(cl_object key, cl_object hashtable, cl_object def)
cl_object output = ECL_NIL;
cl_object sync_lock = hashtable->hash.sync_lock;
mp_get_rwlock_read_wait(sync_lock);
output = hashtable->hash.get_unsafe(key, hashtable, def);
mp_giveup_rwlock_read(sync_lock);
ECL_UNWIND_PROTECT_BEGIN(ecl_process_env()) {
output = hashtable->hash.get_unsafe(key, hashtable, def);
} ECL_UNWIND_PROTECT_THREAD_SAFE_EXIT {
mp_giveup_rwlock_read(sync_lock);
} ECL_UNWIND_PROTECT_THREAD_SAFE_END;
return output;
}
@ -741,8 +794,11 @@ _ecl_remhash_sync(cl_object key, cl_object hashtable)
bool output = 0;
cl_object sync_lock = hashtable->hash.sync_lock;
mp_get_rwlock_write_wait(sync_lock);
output = hashtable->hash.rem_unsafe(key, hashtable);
mp_giveup_rwlock_write(sync_lock);
ECL_UNWIND_PROTECT_BEGIN(ecl_process_env()) {
output = hashtable->hash.rem_unsafe(key, hashtable);
} ECL_UNWIND_PROTECT_THREAD_SAFE_EXIT {
mp_giveup_rwlock_write(sync_lock);
} ECL_UNWIND_PROTECT_THREAD_SAFE_END;
return output;
}
#endif
@ -836,6 +892,7 @@ ecl_extend_hashtable(cl_object hashtable)
}
@(defun make_hash_table (&key (test @'eql')
(hash_function ECL_NIL)
(weakness ECL_NIL)
(synchronized ECL_NIL)
(size ecl_make_fixnum(1024))
@ -843,6 +900,17 @@ ecl_extend_hashtable(cl_object hashtable)
(rehash_threshold cl_core.rehash_threshold))
@ {
cl_object hash = cl__make_hash_table(test, size, rehash_size, rehash_threshold);
if (hash->hash.test == ecl_htt_generic) {
/* Normally we would make hash_function an argument to
cl__make_hash_table and put this test in there and void
unnecessary object allocation, but we do not want to
compromise the API. -- jd 2019-05-23 */
if (hash_function == ECL_NIL) {
FEerror("~S is an illegal hash-table test function.", 1, test);
}
hash_function = si_coerce_to_function(hash_function);
hash->hash.generic_hash = hash_function;
}
#ifdef ECL_WEAK_HASH
if (!Null(weakness)) {
if (weakness == @':key') {
@ -910,6 +978,7 @@ cl__make_hash_table(cl_object test, cl_object size, cl_object rehash_size,
int htt;
cl_index hsize;
cl_object h;
cl_object hash_test = ECL_NIL, hash_func = ECL_NIL;
cl_object (*get)(cl_object, cl_object, cl_object);
cl_object (*set)(cl_object, cl_object, cl_object);
bool (*rem)(cl_object, cl_object);
@ -942,8 +1011,11 @@ cl__make_hash_table(cl_object test, cl_object size, cl_object rehash_size,
set = _ecl_sethash_pack;
rem = _ecl_remhash_pack;
} else {
FEerror("~S is an illegal hash-table test function.",
1, test);
htt = ecl_htt_generic;
get = _ecl_gethash_generic;
set = _ecl_sethash_generic;
rem = _ecl_remhash_generic;
hash_test = si_coerce_to_function(test);
}
if (ecl_unlikely(!ECL_FIXNUMP(size) ||
ecl_fixnum_minusp(size) ||
@ -989,6 +1061,8 @@ cl__make_hash_table(cl_object test, cl_object size, cl_object rehash_size,
h = ecl_alloc_object(t_hashtable);
h->hash.test = htt;
h->hash.weak = ecl_htt_not_weak;
h->hash.generic_test = hash_test;
h->hash.generic_hash = hash_func;
h->hash.get = get;
h->hash.set = set;
h->hash.rem = rem;
@ -1035,6 +1109,11 @@ ecl_reconstruct_serialized_hashtable(cl_object h) {
h->hash.set = _ecl_sethash_pack;
h->hash.rem = _ecl_remhash_pack;
break;
case ecl_htt_generic:
h->hash.get = _ecl_gethash_generic;
h->hash.set = _ecl_sethash_generic;
h->hash.rem = _ecl_remhash_generic;
break;
}
if (h->hash.weak != ecl_htt_not_weak) {
h->hash.get = _ecl_gethash_weak;
@ -1139,12 +1218,13 @@ cl_hash_table_test(cl_object ht)
cl_object output;
assert_type_hash_table(@[hash-table-test], 1, ht);
switch(ht->hash.test) {
case ecl_htt_eq: output = @'eq'; break;
case ecl_htt_eql: output = @'eql'; break;
case ecl_htt_equal: output = @'equal'; break;
case ecl_htt_equalp: output = @'equalp'; break;
case ecl_htt_pack:
default: output = @'equal';
case ecl_htt_eq: output = @'eq'; break;
case ecl_htt_eql: output = @'eql'; break;
case ecl_htt_equal: output = @'equal'; break;
case ecl_htt_equalp: output = @'equalp'; break;
case ecl_htt_pack: output = @'equal'; break;
case ecl_htt_generic: output = ht->hash.generic_test;
default: FEerror("hash-table-test: unknown test.", 0);
}
@(return output);
}
@ -1322,6 +1402,7 @@ si_copy_hash_table(cl_object orig)
cl_hash_table_size(orig),
cl_hash_table_rehash_size(orig),
cl_hash_table_rehash_threshold(orig));
hash->hash.generic_hash = orig->hash.generic_hash,
memcpy(hash->hash.data, orig->hash.data,
orig->hash.size * sizeof(*orig->hash.data));
hash->hash.entries = orig->hash.entries;

View file

@ -176,8 +176,11 @@ write_hashtable(cl_object x, cl_object stream)
{
if (ecl_print_readably() && !Null(ecl_symbol_value(@'*read-eval*'))) {
cl_object make =
cl_list(9, @'make-hash-table',
cl_list(15, @'make-hash-table',
@':size', cl_hash_table_size(x),
@':synchronized', si_hash_table_synchronized_p(x),
@':weakness', si_hash_table_weakness(x),
@':hash-function', x->hash.generic_hash,
@':rehash-size', cl_hash_table_rehash_size(x),
@':rehash-threshold', cl_hash_table_rehash_threshold(x),
@':test', cl_list(2, @'quote', cl_hash_table_test(x)));

View file

@ -1368,6 +1368,7 @@ cl_symbols[] = {
{KEY_ "FORMAT-CONTROL", KEYWORD, NULL, -1, OBJNULL},
{KEY_ "FUNCTION", KEYWORD, NULL, -1, OBJNULL},
{KEY_ "GENSYM", KEYWORD, NULL, -1, OBJNULL},
{KEY_ "HASH-FUNCTION", KEYWORD, NULL, -1, OBJNULL},
{KEY_ "HOST", KEYWORD, NULL, -1, OBJNULL},
{KEY_ "IF-DOES-NOT-EXIST", KEYWORD, NULL, -1, OBJNULL},
{KEY_ "IF-ERROR-EXISTS", KEYWORD, NULL, -1, OBJNULL},

View file

@ -1368,6 +1368,7 @@ cl_symbols[] = {
{KEY_ "FORMAT-CONTROL",NULL},
{KEY_ "FUNCTION",NULL},
{KEY_ "GENSYM",NULL},
{KEY_ "HASH-FUNCTION",NULL},
{KEY_ "HOST",NULL},
{KEY_ "IF-DOES-NOT-EXIST",NULL},
{KEY_ "IF-ERROR-EXISTS",NULL},

View file

@ -1961,17 +1961,36 @@ An echo stream is notated as
#<echo stream n>
where N is a number that identifies the stream.")
(docfun make-hash-table function (&key (test 'eql) (size 1024) (rehash-size 1.5) (rehash-threshold 0.7)) "
(docfun make-hash-table function (&key (test 'eql) (size 1024) (rehash-size 1.5) (rehash-threshold 0.7) (hash-function nil) (synchronized nil) (weakness nil)) "
Creates and returns a hash-table.
TEST specifies which predicate should be used to access hash-table entries.
It must be EQ, EQL, or EQUAL. SIZE specifies the number of entries in the
hash-table. REHASH-SIZE, if an integer, specifies how many entries should be
added when the hash-table becomes 'almost full'. REHASH-SIZE, if a float,
specifies the ratio of the new size and the old size. REHASH-THRESHOLD
specifies when to expand the hash-table. If an integer, the hash-table is
expanded when REHASH-THRESHOLD / REHASH-SIZE entries have been used. If a
float, the hash-table is expanded when REHASH-THRESHOLD times the whole
entries have been used.")
TEST specifies which predicate should be used to access hash-table
entries. It must be EQ, EQL, EQUAL, EQUALP or a function accepting
two arguments. If it is a function then HASH-FUNCTION must be
supplied.
HASH-FUNCTION is used alongside with a custom TEST predicate. It
accepts one argument and must return a positive fixnum being the
object's hash.
SIZE specifies the number of entries in the hash-table.
REHASH-SIZE, if an integer, specifies how many entries should be added
when the hash-table becomes 'almost full'. REHASH-SIZE, if a float,
specifies the ratio of the new size and the old size.
REHASH-THRESHOLD specifies when to expand the hash-table. If an
integer, the hash-table is expanded when REHASH-THRESHOLD /
REHASH-SIZE entries have been used. If a float, the hash-table is
expanded when REHASH-THRESHOLD times the whole entries have been used.
SYNCHRONIZE if T then gethash, (setf gethash) and remhash operations
are protected by a lock - in this case hash tables may be used from
different threads without explicit synchronization.
WEAKNESS is a GC extension and may be one of NIL, :KEY, :VALUE,
:KEY-AND-VALUE or :KEY-OR-VALUE. ")
(docfun make-list function (length &key (initial-element nil)) "
Creates and returns a list of the specified LENGTH, whose elements are all the

View file

@ -63,6 +63,16 @@ table may have some content already, but conflicting keys will be
overwritten.
@end deffn
@subsubsection Custom equivalence predicate
@cindex Hash table generic test
@code{make-hash-table} may accept arbitrary @code{:test} keyword for
the equivalence predicate. If it is not one of the standard predicates
(@code{:eq}, @code{:eql}, @code{:equal}, @code{:equalp}) a keyword
argument @code{:hashing-function} must be a function accepting one
argument and returning a positive fixnum. Otherwise the argument is
ignored.
@subsubsection Example
@exindex Hash table extensions example
@lisp

View file

@ -378,7 +378,8 @@ enum ecl_httest { /* hash table key test function */
ecl_htt_eql, /* eql */
ecl_htt_equal, /* equal */
ecl_htt_equalp, /* equalp */
ecl_htt_pack /* symbol hash */
ecl_htt_pack, /* symbol hash */
ecl_htt_generic /* generic user-supplied test */
};
enum ecl_htweak {
@ -398,6 +399,8 @@ struct ecl_hashtable { /* hash table header */
_ECL_HDR2(test,weak);
struct ecl_hashtable_entry *data; /* pointer to the hash table */
cl_object sync_lock; /* synchronization lock */
cl_object generic_test; /* generic test function */
cl_object generic_hash; /* generic hashing function */
cl_index entries; /* number of entries */
cl_index size; /* hash table size */
cl_index limit; /* hash table threshold (integer value) */

View file

@ -76,3 +76,37 @@
(is (= 3 (gethash :foo ht)))
(is-true (remhash :bar ht))
(is (= 1 (hash-table-count ht)))))
;;; generic test and hash functions
;;; In this test we provide an equality predicate which distinguishes
;;; only two types of numbers: odd and even. HT is synchronized
;;; because we want also to check, if lock is not hogged by errors
;;; inside our function (we pass string for that purpose).
(test hash-tables.custom
(flet ((not-so-fancy-equals (x y)
(if (zerop x)
(= x y)
(eql (evenp x) (evenp y))))
(not-so-fancy-hash (x)
(cond ((zerop x) 0)
((evenp x) 1)
(T 2))))
(signals error (make-hash-table :test #'not-so-fancy-equals))
(let ((ht (make-hash-table :test #'not-so-fancy-equals
:hash-function #'not-so-fancy-hash
:synchronized t)))
(finishes
(setf (gethash 13 ht) 42
(gethash 12 ht) 33
(gethash 10 ht) 55))
(is (= (gethash 12 ht) 55))
(is (= (gethash 1 ht) 42))
(is (null (gethash 0 ht)))
(signals error (gethash "foobar" ht))
(signals error (setf (gethash "foobar" ht) 15))
(finishes (remhash 3 ht))
(is (null (gethash 1 ht)))
(finishes (setf (gethash 55 ht) 0))
(is (= (gethash 13 ht) 0)))))