Merge branch 'hashtable-improvements' into 'develop'

hash-tables: performance improvements

See merge request embeddable-common-lisp/ecl!269
This commit is contained in:
Daniel Kochmański 2022-03-29 09:50:51 +00:00
commit a4b593c110

View file

@ -276,28 +276,15 @@ static cl_hashkey _hash_generic(cl_object ht, cl_object key) {
}
#define HASH_TABLE_LOOP(hkey,hvalue,h,HASH_TABLE_LOOP_TEST) { \
cl_index hsize = hashtable->hash.size; \
cl_index i = h % hsize, j = hsize, k; \
for (k = 0; k < hsize; i = (i + 1) % hsize, k++) { \
cl_index i, hsize = hashtable->hash.size; \
/* INV: there is at least one empty bucket so that this loop will
* terminate */ \
for (i = h % hsize; ; i = (i + 1) % hsize) { \
struct ecl_hashtable_entry *e = hashtable->hash.data + i; \
cl_object hkey = e->key, hvalue = e->value; \
if (hkey == OBJNULL) { \
if (hvalue == OBJNULL) { \
if (j == hsize) \
return e; \
else \
return hashtable->hash.data + j; \
} else { \
if (j == hsize) \
j = i; \
else if (j == i) \
return e; \
} \
continue; \
} \
if (HASH_TABLE_LOOP_TEST) return hashtable->hash.data + i; \
(void)hvalue; /* silence unused-variable compiler warning */ \
if (hkey == OBJNULL || (HASH_TABLE_LOOP_TEST)) return e; \
} \
return hashtable->hash.data + j; \
}
#define HASH_TABLE_SET(h,loop,compute_key,store_key) { \
@ -318,6 +305,52 @@ AGAIN: \
return hashtable; \
}
/* HASH_TABLE_REMOVE tries to fills up holes generated by deleting
* entries from a hashtable as follows. Iterate through all entries f
* to the right of the deleted entry e (the hole). If the distance
* between f's current and its optimal location is greater than the
* distance between e and f, then we can put f into the hole. Repeat
* with the new hole at the location of f until the holes are all
* filled. */
#define HASH_TABLE_REMOVE(hkey,hvalue,h,HASH_TABLE_LOOP_TEST,compute_key) { \
cl_index i, hsize = hashtable->hash.size; \
/* INV: there is at least one empty bucket so that this loop will
* terminate */ \
for (i = h % hsize; ; i = (i + 1) % hsize) { \
struct ecl_hashtable_entry *e = hashtable->hash.data + i; \
cl_object hkey = e->key, hvalue = e->value; \
(void)hvalue; /* silence unused-variable compiler warning */ \
if (hkey == OBJNULL) return 0; \
if (HASH_TABLE_LOOP_TEST) { \
cl_index j = (i+1) % hsize, k; \
for (k = 1; k <= hsize; j = (j+1) % hsize, k++) { \
struct ecl_hashtable_entry *f = hashtable->hash.data + j; \
hkey = f->key; \
hvalue = f->value; \
if (hkey == OBJNULL) { \
e->key = OBJNULL; \
e->value = OBJNULL; \
break; \
} \
cl_hashkey hf = compute_key; \
cl_index m = hf % hsize; \
/* d: distance of f from the optimal position */ \
cl_index d = (j >= m) ? (j - m) : (j + hsize - m); \
if (k <= d) { \
e->key = hkey; \
e->value = hvalue; \
e = f; \
i = j; \
k = 0; \
} \
} \
hashtable->hash.entries--; \
return 1; \
} \
} \
}
/*
* EQ HASHTABLES
*/
@ -338,27 +371,18 @@ _ecl_gethash_eq(cl_object key, cl_object hashtable, cl_object def)
return (e->key == OBJNULL)? def : e->value;
}
static bool
_ecl_remhash_eq(cl_object key, cl_object hashtable)
{
cl_hashkey h = _hash_eq(key);
struct ecl_hashtable_entry *e = _ecl_hash_loop_eq(h, key, hashtable);
if (e->key == OBJNULL) {
return 0;
} else {
e->key = OBJNULL;
e->value = ECL_NIL;
hashtable->hash.entries--;
return 1;
}
}
static cl_object
_ecl_sethash_eq(cl_object key, cl_object hashtable, cl_object value)
{
HASH_TABLE_SET(h, _ecl_hash_loop_eq, _hash_eq(key), key);
}
static bool
_ecl_remhash_eq(cl_object key, cl_object hashtable)
{
HASH_TABLE_REMOVE(hkey, hvalue, _hash_eq(key), key == hkey, _hash_eq(hkey));
}
/*
* EQL HASHTABLES
*/
@ -386,16 +410,7 @@ _ecl_sethash_eql(cl_object key, cl_object hashtable, cl_object value)
static bool
_ecl_remhash_eql(cl_object key, cl_object hashtable)
{
cl_hashkey h = _hash_eql(0, key);
struct ecl_hashtable_entry *e = _ecl_hash_loop_eql(h, key, hashtable);
if (e->key == OBJNULL) {
return 0;
} else {
e->key = OBJNULL;
e->value = ECL_NIL;
hashtable->hash.entries--;
return 1;
}
HASH_TABLE_REMOVE(hkey, hvalue, _hash_eql(0, key), ecl_eql(key, hkey), _hash_eql(0, hkey));
}
/*
@ -425,16 +440,7 @@ _ecl_sethash_equal(cl_object key, cl_object hashtable, cl_object value)
static bool
_ecl_remhash_equal(cl_object key, cl_object hashtable)
{
cl_hashkey h = _hash_equal(3, 0, key);
struct ecl_hashtable_entry *e = _ecl_hash_loop_equal(h, key, hashtable);
if (e->key == OBJNULL) {
return 0;
} else {
e->key = OBJNULL;
e->value = ECL_NIL;
hashtable->hash.entries--;
return 1;
}
HASH_TABLE_REMOVE(hkey, hvalue, _hash_equal(3, 0, key), ecl_equal(key, hkey), _hash_equal(3, 0, hkey));
}
/*
@ -464,16 +470,7 @@ _ecl_sethash_equalp(cl_object key, cl_object hashtable, cl_object value)
static bool
_ecl_remhash_equalp(cl_object key, cl_object hashtable)
{
cl_hashkey h = _hash_equalp(3, 0, key);
struct ecl_hashtable_entry *e = _ecl_hash_loop_equalp(h, key, hashtable);
if (e->key == OBJNULL) {
return 0;
} else {
e->key = OBJNULL;
e->value = ECL_NIL;
hashtable->hash.entries--;
return 1;
}
HASH_TABLE_REMOVE(hkey, hvalue, _hash_equalp(3, 0, key), ecl_equalp(key, hkey), _hash_equalp(3, 0, hkey));
}
/*
@ -505,15 +502,8 @@ static bool
_ecl_remhash_pack(cl_object key, cl_object hashtable)
{
cl_hashkey h = _hash_equal(3, 0, key);
struct ecl_hashtable_entry *e = _ecl_hash_loop_pack(h, key, hashtable);
if (e->key == OBJNULL) {
return 0;
} else {
e->key = OBJNULL;
e->value = ECL_NIL;
hashtable->hash.entries--;
return 1;
}
cl_object ho = ecl_make_fixnum(h & 0xFFFFFFF);
HASH_TABLE_REMOVE(hkey, hvalue, h, (ho==hkey) && ecl_string_eq(key,SYMBOL_NAME(hvalue)), _hash_equal(3, 0, SYMBOL_NAME(hvalue)));
}
/*
@ -549,16 +539,8 @@ _ecl_sethash_generic(cl_object key, cl_object hashtable, cl_object value)
static bool
_ecl_remhash_generic(cl_object key, cl_object hashtable)
{
cl_hashkey h = _hash_generic(hashtable, key);
struct ecl_hashtable_entry *e = _ecl_hash_loop_generic(h, key, hashtable);
if (e->key == OBJNULL) {
return 0;
} else {
e->key = OBJNULL;
e->value = ECL_NIL;
hashtable->hash.entries--;
return 1;
}
cl_object test_fun = hashtable->hash.generic_test;
HASH_TABLE_REMOVE(hkey, hvalue, _hash_generic(hashtable, key), _ecl_generic_hash_test(test_fun, key, hkey), _hash_generic(hashtable, hkey));
}
/*
@ -658,26 +640,12 @@ static struct ecl_hashtable_entry *
_ecl_weak_hash_loop(cl_hashkey h, cl_object key, cl_object hashtable,
struct ecl_hashtable_entry *aux)
{
cl_index hsize = hashtable->hash.size;
cl_index i = h % hsize, j = hsize, k;
for (k = 0; k < hsize; i = (i + 1) % hsize, k++) {
cl_index i, hsize = hashtable->hash.size;
for (i = h % hsize; ; i = (i + 1) % hsize) {
struct ecl_hashtable_entry *p = hashtable->hash.data + i;
struct ecl_hashtable_entry e = *aux = copy_entry(p, hashtable);
if (e.key == OBJNULL) {
if (e.value == OBJNULL) {
if (j == hsize) {
return p;
} else {
return hashtable->hash.data + j;
}
} else {
if (j == hsize) {
j = i;
} else if (j == i) {
return p;
}
}
continue;
return p;
}
switch (hashtable->hash.test) {
case ecl_htt_eq:
@ -704,7 +672,6 @@ _ecl_weak_hash_loop(cl_hashkey h, cl_object key, cl_object hashtable,
ecl_internal_error("Unknown hash test.");
}
}
return hashtable->hash.data + j;
}
static cl_object
@ -759,17 +726,75 @@ _ecl_sethash_weak(cl_object key, cl_object hashtable, cl_object value)
static bool
_ecl_remhash_weak(cl_object key, cl_object hashtable)
{
cl_index i, hsize = hashtable->hash.size;
cl_hashkey h = _ecl_hash_key(hashtable, key);
struct ecl_hashtable_entry aux[1];
struct ecl_hashtable_entry *e =
_ecl_weak_hash_loop(h, key, hashtable, aux);
if (aux->key != OBJNULL) {
hashtable->hash.entries--;
e->key = OBJNULL;
e->value = ECL_NIL;
return 1;
} else {
return 0;
for (i = h % hsize; ; i = (i + 1) % hsize) {
struct ecl_hashtable_entry *p = hashtable->hash.data + i;
struct ecl_hashtable_entry e = copy_entry(p, hashtable);
if (e.key == OBJNULL) {
return 0;
}
bool found = FALSE;
switch (hashtable->hash.test) {
case ecl_htt_eq:
if (e.key == key)
found = TRUE;
break;
case ecl_htt_eql:
if (ecl_eql(e.key, key))
found = TRUE;
break;
case ecl_htt_equal:
if (ecl_equal(e.key, key))
found = TRUE;
break;
case ecl_htt_equalp:
if (ecl_equalp(e.key, key))
found = TRUE;
break;
case ecl_htt_generic:
if (_ecl_generic_hash_test(hashtable->hash.generic_test, e.key, key))
found = TRUE;
break;
default:
ecl_internal_error("Unknown hash test.");
}
if (found) {
cl_index j = (i+1) % hsize, k;
for (k = 1; k <= hsize; j = (j+1) % hsize, k++) {
struct ecl_hashtable_entry *q = hashtable->hash.data + j;
struct ecl_hashtable_entry f = copy_entry(q, hashtable);
if (f.key == OBJNULL) {
break;
}
cl_hashkey hf = _ecl_hash_key(hashtable, f.value);
cl_index m = hf % hsize;
cl_index d = (j >= m) ? (j - m) : (j + hsize - m);
if (k <= d) {
switch (hashtable->hash.weak) {
case ecl_htt_weak_key:
case ecl_htt_weak_key_and_value:
case ecl_htt_weak_key_or_value:
p->key = si_make_weak_pointer(f.key);
default:
p->key = f.key;
}
switch (hashtable->hash.weak) {
case ecl_htt_weak_value:
case ecl_htt_weak_key_and_value:
case ecl_htt_weak_key_or_value:
p->value = si_make_weak_pointer(f.value);
default:
p->value = f.value;
}
p = q;
i = j;
k = 0;
}
}
hashtable->hash.entries--;
return 1;
}
}
}
#endif
@ -889,6 +914,9 @@ ecl_extend_hashtable(cl_object hashtable)
new->hash.entries = 0;
new->hash.size = new_size;
new->hash.limit = new->hash.size * new->hash.factor;
if (new->hash.limit >= new_size) {
new->hash.limit = new_size - 1;
}
new->hash.data = (struct ecl_hashtable_entry *)
ecl_alloc(new_size * sizeof(struct ecl_hashtable_entry));
for (i = 0; i < new_size; i++) {
@ -1089,6 +1117,9 @@ cl__make_hash_table(cl_object test, cl_object size, cl_object rehash_size,
rehash_threshold = cl_max(2, min_threshold, rehash_threshold);
h->hash.factor = ecl_to_double(rehash_threshold);
h->hash.limit = h->hash.size * h->hash.factor;
if (h->hash.limit >= hsize) {
h->hash.limit = hsize - 1;
}
h->hash.data = NULL; /* for GC sake */
h->hash.data = (struct ecl_hashtable_entry *)
ecl_alloc(hsize * sizeof(struct ecl_hashtable_entry));
@ -1281,27 +1312,54 @@ cl_hash_table_count(cl_object ht)
@(return (ecl_make_fixnum(ecl_hash_table_count(ht))));
}
/* HASH TABLE ITERATION
*
* We iterate from right to left across each group of consecutive
* non-empty buckets. This allows removing the current iteration
* element without iterating over elements twice or missing elements
* because we only change elements to the right of the current
* element when removing an element. For example, a hashtable of
* size 10 with 5 filled buckets is iterated over as follows:
*
* a..bc...de
* ^
* a..bc...de
* ^
* a..bc...de
* ^
* a..bc...de
* ^
* a..bc...de
* ^
*
* If for example the element `e` is removed and the element `a` moves
* up at the place that `e` previously occupied, we don't iterate
* twice over `a`.
*/
static cl_object
si_hash_table_iterate(cl_narg narg, ...)
{
const cl_env_ptr the_env = ecl_process_env();
cl_object env = the_env->function->cclosure.env;
cl_object index = CAR(env);
cl_object ht = CADR(env);
cl_fixnum i;
cl_object endpoint = CADR(env);
cl_object ht = CADDR(env);
cl_fixnum i, j;
if (!Null(index)) {
i = ecl_fixnum(index);
if (i < 0)
i = -1;
for (; ++i < ht->hash.size; ) {
i = ecl_fixnum(endpoint);
j = ecl_fixnum(index);
do {
j = (j == 0) ? ht->hash.size-1 : j - 1;
struct ecl_hashtable_entry e =
copy_entry(ht->hash.data + i, ht);
copy_entry(ht->hash.data + j, ht);
if (e.key != OBJNULL) {
cl_object ndx = ecl_make_fixnum(i);
cl_object ndx = ecl_make_fixnum(j);
ECL_RPLACA(env, ndx);
@(return ndx e.key e.value);
}
}
} while (j != i);
ECL_RPLACA(env, ECL_NIL);
}
@(return ECL_NIL);
@ -1310,9 +1368,13 @@ si_hash_table_iterate(cl_narg narg, ...)
cl_object
si_hash_table_iterator(cl_object ht)
{
cl_fixnum i;
assert_type_hash_table(@[si::hash-table-iterator], 1, ht);
/* Make sure we don't start in the middle of a group of consecutive
* filled buckets. */
for (i = ht->hash.size-1; ht->hash.data[i].key != OBJNULL; i--);
@(return ecl_make_cclosure_va(si_hash_table_iterate,
cl_list(2, ecl_make_fixnum(-1), ht),
cl_list(3, ecl_make_fixnum(i), ecl_make_fixnum(i), ht),
@'si::hash-table-iterator',
0));
}
@ -1372,12 +1434,21 @@ cl_sxhash(cl_object key)
cl_object
cl_maphash(cl_object fun, cl_object ht)
{
cl_index i;
cl_index i, j, hsize;
assert_type_hash_table(@[maphash], 2, ht);
for (i = 0; i < ht->hash.size; i++) {
struct ecl_hashtable_entry e = ht->hash.data[i];
if(e.key != OBJNULL) {
if (ht->hash.entries == 0) {
@(return ECL_NIL);
}
hsize = ht->hash.size;
/* Make sure we don't start in the middle of a group of consecutive
* filled buckets. */
for (i = hsize-1; ht->hash.data[i].key != OBJNULL; i--);
j = i;
do {
j = (j == 0) ? hsize-1 : j - 1;
struct ecl_hashtable_entry e = ht->hash.data[j];
if (e.key != OBJNULL) {
cl_object key = e.key;
cl_object val = e.value;
switch (ht->hash.weak) {
@ -1394,7 +1465,7 @@ cl_maphash(cl_object fun, cl_object ht)
}
funcall(3, fun, key, val);
}
}
} while (j != i);
@(return ECL_NIL);
}