mirror of
https://gitlab.com/embeddable-common-lisp/ecl.git
synced 2026-01-26 14:32:11 -08:00
Merge branch 'hashtable-improvements' into 'develop'
hash-tables: performance improvements See merge request embeddable-common-lisp/ecl!269
This commit is contained in:
commit
a4b593c110
1 changed files with 197 additions and 126 deletions
323
src/c/hash.d
323
src/c/hash.d
|
|
@ -276,28 +276,15 @@ static cl_hashkey _hash_generic(cl_object ht, cl_object key) {
|
|||
}
|
||||
|
||||
#define HASH_TABLE_LOOP(hkey,hvalue,h,HASH_TABLE_LOOP_TEST) { \
|
||||
cl_index hsize = hashtable->hash.size; \
|
||||
cl_index i = h % hsize, j = hsize, k; \
|
||||
for (k = 0; k < hsize; i = (i + 1) % hsize, k++) { \
|
||||
cl_index i, hsize = hashtable->hash.size; \
|
||||
/* INV: there is at least one empty bucket so that this loop will
|
||||
* terminate */ \
|
||||
for (i = h % hsize; ; i = (i + 1) % hsize) { \
|
||||
struct ecl_hashtable_entry *e = hashtable->hash.data + i; \
|
||||
cl_object hkey = e->key, hvalue = e->value; \
|
||||
if (hkey == OBJNULL) { \
|
||||
if (hvalue == OBJNULL) { \
|
||||
if (j == hsize) \
|
||||
return e; \
|
||||
else \
|
||||
return hashtable->hash.data + j; \
|
||||
} else { \
|
||||
if (j == hsize) \
|
||||
j = i; \
|
||||
else if (j == i) \
|
||||
return e; \
|
||||
} \
|
||||
continue; \
|
||||
} \
|
||||
if (HASH_TABLE_LOOP_TEST) return hashtable->hash.data + i; \
|
||||
(void)hvalue; /* silence unused-variable compiler warning */ \
|
||||
if (hkey == OBJNULL || (HASH_TABLE_LOOP_TEST)) return e; \
|
||||
} \
|
||||
return hashtable->hash.data + j; \
|
||||
}
|
||||
|
||||
#define HASH_TABLE_SET(h,loop,compute_key,store_key) { \
|
||||
|
|
@ -318,6 +305,52 @@ AGAIN: \
|
|||
return hashtable; \
|
||||
}
|
||||
|
||||
/* HASH_TABLE_REMOVE tries to fills up holes generated by deleting
|
||||
* entries from a hashtable as follows. Iterate through all entries f
|
||||
* to the right of the deleted entry e (the hole). If the distance
|
||||
* between f's current and its optimal location is greater than the
|
||||
* distance between e and f, then we can put f into the hole. Repeat
|
||||
* with the new hole at the location of f until the holes are all
|
||||
* filled. */
|
||||
|
||||
#define HASH_TABLE_REMOVE(hkey,hvalue,h,HASH_TABLE_LOOP_TEST,compute_key) { \
|
||||
cl_index i, hsize = hashtable->hash.size; \
|
||||
/* INV: there is at least one empty bucket so that this loop will
|
||||
* terminate */ \
|
||||
for (i = h % hsize; ; i = (i + 1) % hsize) { \
|
||||
struct ecl_hashtable_entry *e = hashtable->hash.data + i; \
|
||||
cl_object hkey = e->key, hvalue = e->value; \
|
||||
(void)hvalue; /* silence unused-variable compiler warning */ \
|
||||
if (hkey == OBJNULL) return 0; \
|
||||
if (HASH_TABLE_LOOP_TEST) { \
|
||||
cl_index j = (i+1) % hsize, k; \
|
||||
for (k = 1; k <= hsize; j = (j+1) % hsize, k++) { \
|
||||
struct ecl_hashtable_entry *f = hashtable->hash.data + j; \
|
||||
hkey = f->key; \
|
||||
hvalue = f->value; \
|
||||
if (hkey == OBJNULL) { \
|
||||
e->key = OBJNULL; \
|
||||
e->value = OBJNULL; \
|
||||
break; \
|
||||
} \
|
||||
cl_hashkey hf = compute_key; \
|
||||
cl_index m = hf % hsize; \
|
||||
/* d: distance of f from the optimal position */ \
|
||||
cl_index d = (j >= m) ? (j - m) : (j + hsize - m); \
|
||||
if (k <= d) { \
|
||||
e->key = hkey; \
|
||||
e->value = hvalue; \
|
||||
e = f; \
|
||||
i = j; \
|
||||
k = 0; \
|
||||
} \
|
||||
} \
|
||||
hashtable->hash.entries--; \
|
||||
return 1; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
/*
|
||||
* EQ HASHTABLES
|
||||
*/
|
||||
|
|
@ -338,27 +371,18 @@ _ecl_gethash_eq(cl_object key, cl_object hashtable, cl_object def)
|
|||
return (e->key == OBJNULL)? def : e->value;
|
||||
}
|
||||
|
||||
static bool
|
||||
_ecl_remhash_eq(cl_object key, cl_object hashtable)
|
||||
{
|
||||
cl_hashkey h = _hash_eq(key);
|
||||
struct ecl_hashtable_entry *e = _ecl_hash_loop_eq(h, key, hashtable);
|
||||
if (e->key == OBJNULL) {
|
||||
return 0;
|
||||
} else {
|
||||
e->key = OBJNULL;
|
||||
e->value = ECL_NIL;
|
||||
hashtable->hash.entries--;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
static cl_object
|
||||
_ecl_sethash_eq(cl_object key, cl_object hashtable, cl_object value)
|
||||
{
|
||||
HASH_TABLE_SET(h, _ecl_hash_loop_eq, _hash_eq(key), key);
|
||||
}
|
||||
|
||||
static bool
|
||||
_ecl_remhash_eq(cl_object key, cl_object hashtable)
|
||||
{
|
||||
HASH_TABLE_REMOVE(hkey, hvalue, _hash_eq(key), key == hkey, _hash_eq(hkey));
|
||||
}
|
||||
|
||||
/*
|
||||
* EQL HASHTABLES
|
||||
*/
|
||||
|
|
@ -386,16 +410,7 @@ _ecl_sethash_eql(cl_object key, cl_object hashtable, cl_object value)
|
|||
static bool
|
||||
_ecl_remhash_eql(cl_object key, cl_object hashtable)
|
||||
{
|
||||
cl_hashkey h = _hash_eql(0, key);
|
||||
struct ecl_hashtable_entry *e = _ecl_hash_loop_eql(h, key, hashtable);
|
||||
if (e->key == OBJNULL) {
|
||||
return 0;
|
||||
} else {
|
||||
e->key = OBJNULL;
|
||||
e->value = ECL_NIL;
|
||||
hashtable->hash.entries--;
|
||||
return 1;
|
||||
}
|
||||
HASH_TABLE_REMOVE(hkey, hvalue, _hash_eql(0, key), ecl_eql(key, hkey), _hash_eql(0, hkey));
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -425,16 +440,7 @@ _ecl_sethash_equal(cl_object key, cl_object hashtable, cl_object value)
|
|||
static bool
|
||||
_ecl_remhash_equal(cl_object key, cl_object hashtable)
|
||||
{
|
||||
cl_hashkey h = _hash_equal(3, 0, key);
|
||||
struct ecl_hashtable_entry *e = _ecl_hash_loop_equal(h, key, hashtable);
|
||||
if (e->key == OBJNULL) {
|
||||
return 0;
|
||||
} else {
|
||||
e->key = OBJNULL;
|
||||
e->value = ECL_NIL;
|
||||
hashtable->hash.entries--;
|
||||
return 1;
|
||||
}
|
||||
HASH_TABLE_REMOVE(hkey, hvalue, _hash_equal(3, 0, key), ecl_equal(key, hkey), _hash_equal(3, 0, hkey));
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -464,16 +470,7 @@ _ecl_sethash_equalp(cl_object key, cl_object hashtable, cl_object value)
|
|||
static bool
|
||||
_ecl_remhash_equalp(cl_object key, cl_object hashtable)
|
||||
{
|
||||
cl_hashkey h = _hash_equalp(3, 0, key);
|
||||
struct ecl_hashtable_entry *e = _ecl_hash_loop_equalp(h, key, hashtable);
|
||||
if (e->key == OBJNULL) {
|
||||
return 0;
|
||||
} else {
|
||||
e->key = OBJNULL;
|
||||
e->value = ECL_NIL;
|
||||
hashtable->hash.entries--;
|
||||
return 1;
|
||||
}
|
||||
HASH_TABLE_REMOVE(hkey, hvalue, _hash_equalp(3, 0, key), ecl_equalp(key, hkey), _hash_equalp(3, 0, hkey));
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -505,15 +502,8 @@ static bool
|
|||
_ecl_remhash_pack(cl_object key, cl_object hashtable)
|
||||
{
|
||||
cl_hashkey h = _hash_equal(3, 0, key);
|
||||
struct ecl_hashtable_entry *e = _ecl_hash_loop_pack(h, key, hashtable);
|
||||
if (e->key == OBJNULL) {
|
||||
return 0;
|
||||
} else {
|
||||
e->key = OBJNULL;
|
||||
e->value = ECL_NIL;
|
||||
hashtable->hash.entries--;
|
||||
return 1;
|
||||
}
|
||||
cl_object ho = ecl_make_fixnum(h & 0xFFFFFFF);
|
||||
HASH_TABLE_REMOVE(hkey, hvalue, h, (ho==hkey) && ecl_string_eq(key,SYMBOL_NAME(hvalue)), _hash_equal(3, 0, SYMBOL_NAME(hvalue)));
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -549,16 +539,8 @@ _ecl_sethash_generic(cl_object key, cl_object hashtable, cl_object value)
|
|||
static bool
|
||||
_ecl_remhash_generic(cl_object key, cl_object hashtable)
|
||||
{
|
||||
cl_hashkey h = _hash_generic(hashtable, key);
|
||||
struct ecl_hashtable_entry *e = _ecl_hash_loop_generic(h, key, hashtable);
|
||||
if (e->key == OBJNULL) {
|
||||
return 0;
|
||||
} else {
|
||||
e->key = OBJNULL;
|
||||
e->value = ECL_NIL;
|
||||
hashtable->hash.entries--;
|
||||
return 1;
|
||||
}
|
||||
cl_object test_fun = hashtable->hash.generic_test;
|
||||
HASH_TABLE_REMOVE(hkey, hvalue, _hash_generic(hashtable, key), _ecl_generic_hash_test(test_fun, key, hkey), _hash_generic(hashtable, hkey));
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -658,26 +640,12 @@ static struct ecl_hashtable_entry *
|
|||
_ecl_weak_hash_loop(cl_hashkey h, cl_object key, cl_object hashtable,
|
||||
struct ecl_hashtable_entry *aux)
|
||||
{
|
||||
cl_index hsize = hashtable->hash.size;
|
||||
cl_index i = h % hsize, j = hsize, k;
|
||||
for (k = 0; k < hsize; i = (i + 1) % hsize, k++) {
|
||||
cl_index i, hsize = hashtable->hash.size;
|
||||
for (i = h % hsize; ; i = (i + 1) % hsize) {
|
||||
struct ecl_hashtable_entry *p = hashtable->hash.data + i;
|
||||
struct ecl_hashtable_entry e = *aux = copy_entry(p, hashtable);
|
||||
if (e.key == OBJNULL) {
|
||||
if (e.value == OBJNULL) {
|
||||
if (j == hsize) {
|
||||
return p;
|
||||
} else {
|
||||
return hashtable->hash.data + j;
|
||||
}
|
||||
} else {
|
||||
if (j == hsize) {
|
||||
j = i;
|
||||
} else if (j == i) {
|
||||
return p;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
return p;
|
||||
}
|
||||
switch (hashtable->hash.test) {
|
||||
case ecl_htt_eq:
|
||||
|
|
@ -704,7 +672,6 @@ _ecl_weak_hash_loop(cl_hashkey h, cl_object key, cl_object hashtable,
|
|||
ecl_internal_error("Unknown hash test.");
|
||||
}
|
||||
}
|
||||
return hashtable->hash.data + j;
|
||||
}
|
||||
|
||||
static cl_object
|
||||
|
|
@ -759,17 +726,75 @@ _ecl_sethash_weak(cl_object key, cl_object hashtable, cl_object value)
|
|||
static bool
|
||||
_ecl_remhash_weak(cl_object key, cl_object hashtable)
|
||||
{
|
||||
cl_index i, hsize = hashtable->hash.size;
|
||||
cl_hashkey h = _ecl_hash_key(hashtable, key);
|
||||
struct ecl_hashtable_entry aux[1];
|
||||
struct ecl_hashtable_entry *e =
|
||||
_ecl_weak_hash_loop(h, key, hashtable, aux);
|
||||
if (aux->key != OBJNULL) {
|
||||
hashtable->hash.entries--;
|
||||
e->key = OBJNULL;
|
||||
e->value = ECL_NIL;
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
for (i = h % hsize; ; i = (i + 1) % hsize) {
|
||||
struct ecl_hashtable_entry *p = hashtable->hash.data + i;
|
||||
struct ecl_hashtable_entry e = copy_entry(p, hashtable);
|
||||
if (e.key == OBJNULL) {
|
||||
return 0;
|
||||
}
|
||||
bool found = FALSE;
|
||||
switch (hashtable->hash.test) {
|
||||
case ecl_htt_eq:
|
||||
if (e.key == key)
|
||||
found = TRUE;
|
||||
break;
|
||||
case ecl_htt_eql:
|
||||
if (ecl_eql(e.key, key))
|
||||
found = TRUE;
|
||||
break;
|
||||
case ecl_htt_equal:
|
||||
if (ecl_equal(e.key, key))
|
||||
found = TRUE;
|
||||
break;
|
||||
case ecl_htt_equalp:
|
||||
if (ecl_equalp(e.key, key))
|
||||
found = TRUE;
|
||||
break;
|
||||
case ecl_htt_generic:
|
||||
if (_ecl_generic_hash_test(hashtable->hash.generic_test, e.key, key))
|
||||
found = TRUE;
|
||||
break;
|
||||
default:
|
||||
ecl_internal_error("Unknown hash test.");
|
||||
}
|
||||
if (found) {
|
||||
cl_index j = (i+1) % hsize, k;
|
||||
for (k = 1; k <= hsize; j = (j+1) % hsize, k++) {
|
||||
struct ecl_hashtable_entry *q = hashtable->hash.data + j;
|
||||
struct ecl_hashtable_entry f = copy_entry(q, hashtable);
|
||||
if (f.key == OBJNULL) {
|
||||
break;
|
||||
}
|
||||
cl_hashkey hf = _ecl_hash_key(hashtable, f.value);
|
||||
cl_index m = hf % hsize;
|
||||
cl_index d = (j >= m) ? (j - m) : (j + hsize - m);
|
||||
if (k <= d) {
|
||||
switch (hashtable->hash.weak) {
|
||||
case ecl_htt_weak_key:
|
||||
case ecl_htt_weak_key_and_value:
|
||||
case ecl_htt_weak_key_or_value:
|
||||
p->key = si_make_weak_pointer(f.key);
|
||||
default:
|
||||
p->key = f.key;
|
||||
}
|
||||
switch (hashtable->hash.weak) {
|
||||
case ecl_htt_weak_value:
|
||||
case ecl_htt_weak_key_and_value:
|
||||
case ecl_htt_weak_key_or_value:
|
||||
p->value = si_make_weak_pointer(f.value);
|
||||
default:
|
||||
p->value = f.value;
|
||||
}
|
||||
p = q;
|
||||
i = j;
|
||||
k = 0;
|
||||
}
|
||||
}
|
||||
hashtable->hash.entries--;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
|
@ -889,6 +914,9 @@ ecl_extend_hashtable(cl_object hashtable)
|
|||
new->hash.entries = 0;
|
||||
new->hash.size = new_size;
|
||||
new->hash.limit = new->hash.size * new->hash.factor;
|
||||
if (new->hash.limit >= new_size) {
|
||||
new->hash.limit = new_size - 1;
|
||||
}
|
||||
new->hash.data = (struct ecl_hashtable_entry *)
|
||||
ecl_alloc(new_size * sizeof(struct ecl_hashtable_entry));
|
||||
for (i = 0; i < new_size; i++) {
|
||||
|
|
@ -1089,6 +1117,9 @@ cl__make_hash_table(cl_object test, cl_object size, cl_object rehash_size,
|
|||
rehash_threshold = cl_max(2, min_threshold, rehash_threshold);
|
||||
h->hash.factor = ecl_to_double(rehash_threshold);
|
||||
h->hash.limit = h->hash.size * h->hash.factor;
|
||||
if (h->hash.limit >= hsize) {
|
||||
h->hash.limit = hsize - 1;
|
||||
}
|
||||
h->hash.data = NULL; /* for GC sake */
|
||||
h->hash.data = (struct ecl_hashtable_entry *)
|
||||
ecl_alloc(hsize * sizeof(struct ecl_hashtable_entry));
|
||||
|
|
@ -1281,27 +1312,54 @@ cl_hash_table_count(cl_object ht)
|
|||
@(return (ecl_make_fixnum(ecl_hash_table_count(ht))));
|
||||
}
|
||||
|
||||
|
||||
/* HASH TABLE ITERATION
|
||||
*
|
||||
* We iterate from right to left across each group of consecutive
|
||||
* non-empty buckets. This allows removing the current iteration
|
||||
* element without iterating over elements twice or missing elements
|
||||
* because we only change elements to the right of the current
|
||||
* element when removing an element. For example, a hashtable of
|
||||
* size 10 with 5 filled buckets is iterated over as follows:
|
||||
*
|
||||
* a..bc...de
|
||||
* ^
|
||||
* a..bc...de
|
||||
* ^
|
||||
* a..bc...de
|
||||
* ^
|
||||
* a..bc...de
|
||||
* ^
|
||||
* a..bc...de
|
||||
* ^
|
||||
*
|
||||
* If for example the element `e` is removed and the element `a` moves
|
||||
* up at the place that `e` previously occupied, we don't iterate
|
||||
* twice over `a`.
|
||||
*/
|
||||
|
||||
static cl_object
|
||||
si_hash_table_iterate(cl_narg narg, ...)
|
||||
{
|
||||
const cl_env_ptr the_env = ecl_process_env();
|
||||
cl_object env = the_env->function->cclosure.env;
|
||||
cl_object index = CAR(env);
|
||||
cl_object ht = CADR(env);
|
||||
cl_fixnum i;
|
||||
cl_object endpoint = CADR(env);
|
||||
cl_object ht = CADDR(env);
|
||||
cl_fixnum i, j;
|
||||
if (!Null(index)) {
|
||||
i = ecl_fixnum(index);
|
||||
if (i < 0)
|
||||
i = -1;
|
||||
for (; ++i < ht->hash.size; ) {
|
||||
i = ecl_fixnum(endpoint);
|
||||
j = ecl_fixnum(index);
|
||||
do {
|
||||
j = (j == 0) ? ht->hash.size-1 : j - 1;
|
||||
struct ecl_hashtable_entry e =
|
||||
copy_entry(ht->hash.data + i, ht);
|
||||
copy_entry(ht->hash.data + j, ht);
|
||||
if (e.key != OBJNULL) {
|
||||
cl_object ndx = ecl_make_fixnum(i);
|
||||
cl_object ndx = ecl_make_fixnum(j);
|
||||
ECL_RPLACA(env, ndx);
|
||||
@(return ndx e.key e.value);
|
||||
}
|
||||
}
|
||||
} while (j != i);
|
||||
ECL_RPLACA(env, ECL_NIL);
|
||||
}
|
||||
@(return ECL_NIL);
|
||||
|
|
@ -1310,9 +1368,13 @@ si_hash_table_iterate(cl_narg narg, ...)
|
|||
cl_object
|
||||
si_hash_table_iterator(cl_object ht)
|
||||
{
|
||||
cl_fixnum i;
|
||||
assert_type_hash_table(@[si::hash-table-iterator], 1, ht);
|
||||
/* Make sure we don't start in the middle of a group of consecutive
|
||||
* filled buckets. */
|
||||
for (i = ht->hash.size-1; ht->hash.data[i].key != OBJNULL; i--);
|
||||
@(return ecl_make_cclosure_va(si_hash_table_iterate,
|
||||
cl_list(2, ecl_make_fixnum(-1), ht),
|
||||
cl_list(3, ecl_make_fixnum(i), ecl_make_fixnum(i), ht),
|
||||
@'si::hash-table-iterator',
|
||||
0));
|
||||
}
|
||||
|
|
@ -1372,12 +1434,21 @@ cl_sxhash(cl_object key)
|
|||
cl_object
|
||||
cl_maphash(cl_object fun, cl_object ht)
|
||||
{
|
||||
cl_index i;
|
||||
cl_index i, j, hsize;
|
||||
|
||||
assert_type_hash_table(@[maphash], 2, ht);
|
||||
for (i = 0; i < ht->hash.size; i++) {
|
||||
struct ecl_hashtable_entry e = ht->hash.data[i];
|
||||
if(e.key != OBJNULL) {
|
||||
if (ht->hash.entries == 0) {
|
||||
@(return ECL_NIL);
|
||||
}
|
||||
hsize = ht->hash.size;
|
||||
/* Make sure we don't start in the middle of a group of consecutive
|
||||
* filled buckets. */
|
||||
for (i = hsize-1; ht->hash.data[i].key != OBJNULL; i--);
|
||||
j = i;
|
||||
do {
|
||||
j = (j == 0) ? hsize-1 : j - 1;
|
||||
struct ecl_hashtable_entry e = ht->hash.data[j];
|
||||
if (e.key != OBJNULL) {
|
||||
cl_object key = e.key;
|
||||
cl_object val = e.value;
|
||||
switch (ht->hash.weak) {
|
||||
|
|
@ -1394,7 +1465,7 @@ cl_maphash(cl_object fun, cl_object ht)
|
|||
}
|
||||
funcall(3, fun, key, val);
|
||||
}
|
||||
}
|
||||
} while (j != i);
|
||||
@(return ECL_NIL);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue