diff --git a/src/c/hash.d b/src/c/hash.d index c42c61690..c7dbb8c2e 100644 --- a/src/c/hash.d +++ b/src/c/hash.d @@ -276,28 +276,15 @@ static cl_hashkey _hash_generic(cl_object ht, cl_object key) { } #define HASH_TABLE_LOOP(hkey,hvalue,h,HASH_TABLE_LOOP_TEST) { \ - cl_index hsize = hashtable->hash.size; \ - cl_index i = h % hsize, j = hsize, k; \ - for (k = 0; k < hsize; i = (i + 1) % hsize, k++) { \ + cl_index i, hsize = hashtable->hash.size; \ + /* INV: there is at least one empty bucket so that this loop will + * terminate */ \ + for (i = h % hsize; ; i = (i + 1) % hsize) { \ struct ecl_hashtable_entry *e = hashtable->hash.data + i; \ cl_object hkey = e->key, hvalue = e->value; \ - if (hkey == OBJNULL) { \ - if (hvalue == OBJNULL) { \ - if (j == hsize) \ - return e; \ - else \ - return hashtable->hash.data + j; \ - } else { \ - if (j == hsize) \ - j = i; \ - else if (j == i) \ - return e; \ - } \ - continue; \ - } \ - if (HASH_TABLE_LOOP_TEST) return hashtable->hash.data + i; \ + (void)hvalue; /* silence unused-variable compiler warning */ \ + if (hkey == OBJNULL || (HASH_TABLE_LOOP_TEST)) return e; \ } \ - return hashtable->hash.data + j; \ } #define HASH_TABLE_SET(h,loop,compute_key,store_key) { \ @@ -318,6 +305,52 @@ AGAIN: \ return hashtable; \ } +/* HASH_TABLE_REMOVE tries to fills up holes generated by deleting + * entries from a hashtable as follows. Iterate through all entries f + * to the right of the deleted entry e (the hole). If the distance + * between f's current and its optimal location is greater than the + * distance between e and f, then we can put f into the hole. Repeat + * with the new hole at the location of f until the holes are all + * filled. */ + +#define HASH_TABLE_REMOVE(hkey,hvalue,h,HASH_TABLE_LOOP_TEST,compute_key) { \ + cl_index i, hsize = hashtable->hash.size; \ + /* INV: there is at least one empty bucket so that this loop will + * terminate */ \ + for (i = h % hsize; ; i = (i + 1) % hsize) { \ + struct ecl_hashtable_entry *e = hashtable->hash.data + i; \ + cl_object hkey = e->key, hvalue = e->value; \ + (void)hvalue; /* silence unused-variable compiler warning */ \ + if (hkey == OBJNULL) return 0; \ + if (HASH_TABLE_LOOP_TEST) { \ + cl_index j = (i+1) % hsize, k; \ + for (k = 1; k <= hsize; j = (j+1) % hsize, k++) { \ + struct ecl_hashtable_entry *f = hashtable->hash.data + j; \ + hkey = f->key; \ + hvalue = f->value; \ + if (hkey == OBJNULL) { \ + e->key = OBJNULL; \ + e->value = OBJNULL; \ + break; \ + } \ + cl_hashkey hf = compute_key; \ + cl_index m = hf % hsize; \ + /* d: distance of f from the optimal position */ \ + cl_index d = (j >= m) ? (j - m) : (j + hsize - m); \ + if (k <= d) { \ + e->key = hkey; \ + e->value = hvalue; \ + e = f; \ + i = j; \ + k = 0; \ + } \ + } \ + hashtable->hash.entries--; \ + return 1; \ + } \ + } \ +} + /* * EQ HASHTABLES */ @@ -338,27 +371,18 @@ _ecl_gethash_eq(cl_object key, cl_object hashtable, cl_object def) return (e->key == OBJNULL)? def : e->value; } -static bool -_ecl_remhash_eq(cl_object key, cl_object hashtable) -{ - cl_hashkey h = _hash_eq(key); - struct ecl_hashtable_entry *e = _ecl_hash_loop_eq(h, key, hashtable); - if (e->key == OBJNULL) { - return 0; - } else { - e->key = OBJNULL; - e->value = ECL_NIL; - hashtable->hash.entries--; - return 1; - } -} - static cl_object _ecl_sethash_eq(cl_object key, cl_object hashtable, cl_object value) { HASH_TABLE_SET(h, _ecl_hash_loop_eq, _hash_eq(key), key); } +static bool +_ecl_remhash_eq(cl_object key, cl_object hashtable) +{ + HASH_TABLE_REMOVE(hkey, hvalue, _hash_eq(key), key == hkey, _hash_eq(hkey)); +} + /* * EQL HASHTABLES */ @@ -386,16 +410,7 @@ _ecl_sethash_eql(cl_object key, cl_object hashtable, cl_object value) static bool _ecl_remhash_eql(cl_object key, cl_object hashtable) { - cl_hashkey h = _hash_eql(0, key); - struct ecl_hashtable_entry *e = _ecl_hash_loop_eql(h, key, hashtable); - if (e->key == OBJNULL) { - return 0; - } else { - e->key = OBJNULL; - e->value = ECL_NIL; - hashtable->hash.entries--; - return 1; - } + HASH_TABLE_REMOVE(hkey, hvalue, _hash_eql(0, key), ecl_eql(key, hkey), _hash_eql(0, hkey)); } /* @@ -425,16 +440,7 @@ _ecl_sethash_equal(cl_object key, cl_object hashtable, cl_object value) static bool _ecl_remhash_equal(cl_object key, cl_object hashtable) { - cl_hashkey h = _hash_equal(3, 0, key); - struct ecl_hashtable_entry *e = _ecl_hash_loop_equal(h, key, hashtable); - if (e->key == OBJNULL) { - return 0; - } else { - e->key = OBJNULL; - e->value = ECL_NIL; - hashtable->hash.entries--; - return 1; - } + HASH_TABLE_REMOVE(hkey, hvalue, _hash_equal(3, 0, key), ecl_equal(key, hkey), _hash_equal(3, 0, hkey)); } /* @@ -464,16 +470,7 @@ _ecl_sethash_equalp(cl_object key, cl_object hashtable, cl_object value) static bool _ecl_remhash_equalp(cl_object key, cl_object hashtable) { - cl_hashkey h = _hash_equalp(3, 0, key); - struct ecl_hashtable_entry *e = _ecl_hash_loop_equalp(h, key, hashtable); - if (e->key == OBJNULL) { - return 0; - } else { - e->key = OBJNULL; - e->value = ECL_NIL; - hashtable->hash.entries--; - return 1; - } + HASH_TABLE_REMOVE(hkey, hvalue, _hash_equalp(3, 0, key), ecl_equalp(key, hkey), _hash_equalp(3, 0, hkey)); } /* @@ -505,15 +502,8 @@ static bool _ecl_remhash_pack(cl_object key, cl_object hashtable) { cl_hashkey h = _hash_equal(3, 0, key); - struct ecl_hashtable_entry *e = _ecl_hash_loop_pack(h, key, hashtable); - if (e->key == OBJNULL) { - return 0; - } else { - e->key = OBJNULL; - e->value = ECL_NIL; - hashtable->hash.entries--; - return 1; - } + cl_object ho = ecl_make_fixnum(h & 0xFFFFFFF); + HASH_TABLE_REMOVE(hkey, hvalue, h, (ho==hkey) && ecl_string_eq(key,SYMBOL_NAME(hvalue)), _hash_equal(3, 0, SYMBOL_NAME(hvalue))); } /* @@ -549,16 +539,8 @@ _ecl_sethash_generic(cl_object key, cl_object hashtable, cl_object value) static bool _ecl_remhash_generic(cl_object key, cl_object hashtable) { - cl_hashkey h = _hash_generic(hashtable, key); - struct ecl_hashtable_entry *e = _ecl_hash_loop_generic(h, key, hashtable); - if (e->key == OBJNULL) { - return 0; - } else { - e->key = OBJNULL; - e->value = ECL_NIL; - hashtable->hash.entries--; - return 1; - } + cl_object test_fun = hashtable->hash.generic_test; + HASH_TABLE_REMOVE(hkey, hvalue, _hash_generic(hashtable, key), _ecl_generic_hash_test(test_fun, key, hkey), _hash_generic(hashtable, hkey)); } /* @@ -658,26 +640,12 @@ static struct ecl_hashtable_entry * _ecl_weak_hash_loop(cl_hashkey h, cl_object key, cl_object hashtable, struct ecl_hashtable_entry *aux) { - cl_index hsize = hashtable->hash.size; - cl_index i = h % hsize, j = hsize, k; - for (k = 0; k < hsize; i = (i + 1) % hsize, k++) { + cl_index i, hsize = hashtable->hash.size; + for (i = h % hsize; ; i = (i + 1) % hsize) { struct ecl_hashtable_entry *p = hashtable->hash.data + i; struct ecl_hashtable_entry e = *aux = copy_entry(p, hashtable); if (e.key == OBJNULL) { - if (e.value == OBJNULL) { - if (j == hsize) { - return p; - } else { - return hashtable->hash.data + j; - } - } else { - if (j == hsize) { - j = i; - } else if (j == i) { - return p; - } - } - continue; + return p; } switch (hashtable->hash.test) { case ecl_htt_eq: @@ -704,7 +672,6 @@ _ecl_weak_hash_loop(cl_hashkey h, cl_object key, cl_object hashtable, ecl_internal_error("Unknown hash test."); } } - return hashtable->hash.data + j; } static cl_object @@ -759,17 +726,75 @@ _ecl_sethash_weak(cl_object key, cl_object hashtable, cl_object value) static bool _ecl_remhash_weak(cl_object key, cl_object hashtable) { + cl_index i, hsize = hashtable->hash.size; cl_hashkey h = _ecl_hash_key(hashtable, key); - struct ecl_hashtable_entry aux[1]; - struct ecl_hashtable_entry *e = - _ecl_weak_hash_loop(h, key, hashtable, aux); - if (aux->key != OBJNULL) { - hashtable->hash.entries--; - e->key = OBJNULL; - e->value = ECL_NIL; - return 1; - } else { - return 0; + for (i = h % hsize; ; i = (i + 1) % hsize) { + struct ecl_hashtable_entry *p = hashtable->hash.data + i; + struct ecl_hashtable_entry e = copy_entry(p, hashtable); + if (e.key == OBJNULL) { + return 0; + } + bool found = FALSE; + switch (hashtable->hash.test) { + case ecl_htt_eq: + if (e.key == key) + found = TRUE; + break; + case ecl_htt_eql: + if (ecl_eql(e.key, key)) + found = TRUE; + break; + case ecl_htt_equal: + if (ecl_equal(e.key, key)) + found = TRUE; + break; + case ecl_htt_equalp: + if (ecl_equalp(e.key, key)) + found = TRUE; + break; + case ecl_htt_generic: + if (_ecl_generic_hash_test(hashtable->hash.generic_test, e.key, key)) + found = TRUE; + break; + default: + ecl_internal_error("Unknown hash test."); + } + if (found) { + cl_index j = (i+1) % hsize, k; + for (k = 1; k <= hsize; j = (j+1) % hsize, k++) { + struct ecl_hashtable_entry *q = hashtable->hash.data + j; + struct ecl_hashtable_entry f = copy_entry(q, hashtable); + if (f.key == OBJNULL) { + break; + } + cl_hashkey hf = _ecl_hash_key(hashtable, f.value); + cl_index m = hf % hsize; + cl_index d = (j >= m) ? (j - m) : (j + hsize - m); + if (k <= d) { + switch (hashtable->hash.weak) { + case ecl_htt_weak_key: + case ecl_htt_weak_key_and_value: + case ecl_htt_weak_key_or_value: + p->key = si_make_weak_pointer(f.key); + default: + p->key = f.key; + } + switch (hashtable->hash.weak) { + case ecl_htt_weak_value: + case ecl_htt_weak_key_and_value: + case ecl_htt_weak_key_or_value: + p->value = si_make_weak_pointer(f.value); + default: + p->value = f.value; + } + p = q; + i = j; + k = 0; + } + } + hashtable->hash.entries--; + return 1; + } } } #endif @@ -889,6 +914,9 @@ ecl_extend_hashtable(cl_object hashtable) new->hash.entries = 0; new->hash.size = new_size; new->hash.limit = new->hash.size * new->hash.factor; + if (new->hash.limit >= new_size) { + new->hash.limit = new_size - 1; + } new->hash.data = (struct ecl_hashtable_entry *) ecl_alloc(new_size * sizeof(struct ecl_hashtable_entry)); for (i = 0; i < new_size; i++) { @@ -1089,6 +1117,9 @@ cl__make_hash_table(cl_object test, cl_object size, cl_object rehash_size, rehash_threshold = cl_max(2, min_threshold, rehash_threshold); h->hash.factor = ecl_to_double(rehash_threshold); h->hash.limit = h->hash.size * h->hash.factor; + if (h->hash.limit >= hsize) { + h->hash.limit = hsize - 1; + } h->hash.data = NULL; /* for GC sake */ h->hash.data = (struct ecl_hashtable_entry *) ecl_alloc(hsize * sizeof(struct ecl_hashtable_entry)); @@ -1281,27 +1312,54 @@ cl_hash_table_count(cl_object ht) @(return (ecl_make_fixnum(ecl_hash_table_count(ht)))); } + +/* HASH TABLE ITERATION + * + * We iterate from right to left across each group of consecutive + * non-empty buckets. This allows removing the current iteration + * element without iterating over elements twice or missing elements + * because we only change elements to the right of the current + * element when removing an element. For example, a hashtable of + * size 10 with 5 filled buckets is iterated over as follows: + * + * a..bc...de + * ^ + * a..bc...de + * ^ + * a..bc...de + * ^ + * a..bc...de + * ^ + * a..bc...de + * ^ + * + * If for example the element `e` is removed and the element `a` moves + * up at the place that `e` previously occupied, we don't iterate + * twice over `a`. + */ + static cl_object si_hash_table_iterate(cl_narg narg, ...) { const cl_env_ptr the_env = ecl_process_env(); cl_object env = the_env->function->cclosure.env; cl_object index = CAR(env); - cl_object ht = CADR(env); - cl_fixnum i; + cl_object endpoint = CADR(env); + cl_object ht = CADDR(env); + cl_fixnum i, j; if (!Null(index)) { - i = ecl_fixnum(index); - if (i < 0) - i = -1; - for (; ++i < ht->hash.size; ) { + i = ecl_fixnum(endpoint); + j = ecl_fixnum(index); + do { + j = (j == 0) ? ht->hash.size-1 : j - 1; struct ecl_hashtable_entry e = - copy_entry(ht->hash.data + i, ht); + copy_entry(ht->hash.data + j, ht); if (e.key != OBJNULL) { - cl_object ndx = ecl_make_fixnum(i); + cl_object ndx = ecl_make_fixnum(j); ECL_RPLACA(env, ndx); @(return ndx e.key e.value); } - } + } while (j != i); ECL_RPLACA(env, ECL_NIL); } @(return ECL_NIL); @@ -1310,9 +1368,13 @@ si_hash_table_iterate(cl_narg narg, ...) cl_object si_hash_table_iterator(cl_object ht) { + cl_fixnum i; assert_type_hash_table(@[si::hash-table-iterator], 1, ht); + /* Make sure we don't start in the middle of a group of consecutive + * filled buckets. */ + for (i = ht->hash.size-1; ht->hash.data[i].key != OBJNULL; i--); @(return ecl_make_cclosure_va(si_hash_table_iterate, - cl_list(2, ecl_make_fixnum(-1), ht), + cl_list(3, ecl_make_fixnum(i), ecl_make_fixnum(i), ht), @'si::hash-table-iterator', 0)); } @@ -1372,12 +1434,21 @@ cl_sxhash(cl_object key) cl_object cl_maphash(cl_object fun, cl_object ht) { - cl_index i; + cl_index i, j, hsize; assert_type_hash_table(@[maphash], 2, ht); - for (i = 0; i < ht->hash.size; i++) { - struct ecl_hashtable_entry e = ht->hash.data[i]; - if(e.key != OBJNULL) { + if (ht->hash.entries == 0) { + @(return ECL_NIL); + } + hsize = ht->hash.size; + /* Make sure we don't start in the middle of a group of consecutive + * filled buckets. */ + for (i = hsize-1; ht->hash.data[i].key != OBJNULL; i--); + j = i; + do { + j = (j == 0) ? hsize-1 : j - 1; + struct ecl_hashtable_entry e = ht->hash.data[j]; + if (e.key != OBJNULL) { cl_object key = e.key; cl_object val = e.value; switch (ht->hash.weak) { @@ -1394,7 +1465,7 @@ cl_maphash(cl_object fun, cl_object ht) } funcall(3, fun, key, val); } - } + } while (j != i); @(return ECL_NIL); }