mirror of
git://git.sv.gnu.org/emacs.git
synced 2025-12-15 10:30:25 -08:00
Change hash range reduction from remainder to multiplication
This makes both lookups and rehashing cheaper. The index vector size is now always a power of 2. The first table size is reduced to 6 (from 8), because index vectors would become excessively big otherwise. * src/lisp.h (struct Lisp_Hash_Table): Replace index_size with index_bits. All references adapted. (hash_table_index_size): New accessor; use it where applicable. * src/fns.c (hash_index_size): Replace with... (compute_hash_index_bits): ...this new function, returning the log2 of the index size. All callers adapted. (hash_index_index): Knuth multiplicative hashing instead of remainder. (maybe_resize_hash_table): Reduce first table size from 8 to 6.
This commit is contained in:
parent
f6225d125c
commit
e66870400d
4 changed files with 53 additions and 45 deletions
|
|
@ -3443,7 +3443,7 @@ cleanup_vector (struct Lisp_Vector *vector)
|
|||
struct Lisp_Hash_Table *h = PSEUDOVEC_STRUCT (vector, Lisp_Hash_Table);
|
||||
if (h->table_size > 0)
|
||||
{
|
||||
eassert (h->index_size > 1);
|
||||
eassert (h->index_bits > 0);
|
||||
xfree (h->index);
|
||||
xfree (h->key_and_value);
|
||||
xfree (h->next);
|
||||
|
|
@ -3451,7 +3451,7 @@ cleanup_vector (struct Lisp_Vector *vector)
|
|||
ptrdiff_t bytes = (h->table_size * (2 * sizeof *h->key_and_value
|
||||
+ sizeof *h->hash
|
||||
+ sizeof *h->next)
|
||||
+ h->index_size * sizeof *h->index);
|
||||
+ hash_table_index_size (h) * sizeof *h->index);
|
||||
hash_table_allocated_bytes -= bytes;
|
||||
}
|
||||
}
|
||||
|
|
@ -5959,7 +5959,8 @@ purecopy_hash_table (struct Lisp_Hash_Table *table)
|
|||
for (ptrdiff_t i = 0; i < nvalues; i++)
|
||||
pure->key_and_value[i] = purecopy (table->key_and_value[i]);
|
||||
|
||||
ptrdiff_t index_bytes = table->index_size * sizeof *table->index;
|
||||
ptrdiff_t index_bytes = hash_table_index_size (table)
|
||||
* sizeof *table->index;
|
||||
pure->index = pure_alloc (index_bytes, -(int)sizeof *table->index);
|
||||
memcpy (pure->index, table->index, index_bytes);
|
||||
}
|
||||
|
|
|
|||
76
src/fns.c
76
src/fns.c
|
|
@ -4291,7 +4291,7 @@ set_hash_hash_slot (struct Lisp_Hash_Table *h, ptrdiff_t idx, hash_hash_t val)
|
|||
static void
|
||||
set_hash_index_slot (struct Lisp_Hash_Table *h, ptrdiff_t idx, ptrdiff_t val)
|
||||
{
|
||||
eassert (idx >= 0 && idx < h->index_size);
|
||||
eassert (idx >= 0 && idx < hash_table_index_size (h));
|
||||
h->index[idx] = val;
|
||||
}
|
||||
|
||||
|
|
@ -4392,7 +4392,7 @@ HASH_NEXT (struct Lisp_Hash_Table *h, ptrdiff_t idx)
|
|||
static ptrdiff_t
|
||||
HASH_INDEX (struct Lisp_Hash_Table *h, ptrdiff_t idx)
|
||||
{
|
||||
eassert (idx >= 0 && idx < h->index_size);
|
||||
eassert (idx >= 0 && idx < hash_table_index_size (h));
|
||||
return h->index[idx];
|
||||
}
|
||||
|
||||
|
|
@ -4527,26 +4527,19 @@ allocate_hash_table (void)
|
|||
return ALLOCATE_PLAIN_PSEUDOVECTOR (struct Lisp_Hash_Table, PVEC_HASH_TABLE);
|
||||
}
|
||||
|
||||
/* Compute the size of the index from the table capacity. */
|
||||
static ptrdiff_t
|
||||
hash_index_size (ptrdiff_t size)
|
||||
/* Compute the size of the index (as log2) from the table capacity. */
|
||||
static int
|
||||
compute_hash_index_bits (hash_idx_t size)
|
||||
{
|
||||
/* An upper bound on the size of a hash table index. It must fit in
|
||||
ptrdiff_t and be a valid Emacs fixnum. */
|
||||
ptrdiff_t upper_bound = min (MOST_POSITIVE_FIXNUM,
|
||||
min (TYPE_MAXIMUM (hash_idx_t),
|
||||
PTRDIFF_MAX / sizeof (ptrdiff_t)));
|
||||
/* Single-element index vectors are used iff size=0. */
|
||||
eassert (size > 0);
|
||||
ptrdiff_t lower_bound = 2;
|
||||
ptrdiff_t index_size = size + max (size >> 2, 1); /* 1.25x larger */
|
||||
if (index_size < upper_bound)
|
||||
index_size = (index_size < lower_bound
|
||||
? lower_bound
|
||||
: next_almost_prime (index_size));
|
||||
if (index_size > upper_bound)
|
||||
/* An upper bound on the size of a hash table index index. */
|
||||
hash_idx_t upper_bound = min (MOST_POSITIVE_FIXNUM,
|
||||
min (TYPE_MAXIMUM (hash_idx_t),
|
||||
PTRDIFF_MAX / sizeof (hash_idx_t)));
|
||||
/* Use next higher power of 2. This works even for size=0. */
|
||||
int bits = elogb (size) + 1;
|
||||
if (bits >= TYPE_WIDTH (uintmax_t) || ((uintmax_t)1 << bits) > upper_bound)
|
||||
error ("Hash table too large");
|
||||
return index_size;
|
||||
return bits;
|
||||
}
|
||||
|
||||
/* Constant hash index vector used when the table size is zero.
|
||||
|
|
@ -4587,7 +4580,7 @@ make_hash_table (const struct hash_table_test *test, EMACS_INT size,
|
|||
h->key_and_value = NULL;
|
||||
h->hash = NULL;
|
||||
h->next = NULL;
|
||||
h->index_size = 1;
|
||||
h->index_bits = 0;
|
||||
h->index = (hash_idx_t *)empty_hash_index_vector;
|
||||
h->next_free = -1;
|
||||
}
|
||||
|
|
@ -4605,8 +4598,9 @@ make_hash_table (const struct hash_table_test *test, EMACS_INT size,
|
|||
h->next[i] = i + 1;
|
||||
h->next[size - 1] = -1;
|
||||
|
||||
int index_size = hash_index_size (size);
|
||||
h->index_size = index_size;
|
||||
int index_bits = compute_hash_index_bits (size);
|
||||
h->index_bits = index_bits;
|
||||
ptrdiff_t index_size = hash_table_index_size (h);
|
||||
h->index = hash_table_alloc_bytes (index_size * sizeof *h->index);
|
||||
for (ptrdiff_t i = 0; i < index_size; i++)
|
||||
h->index[i] = -1;
|
||||
|
|
@ -4654,7 +4648,7 @@ copy_hash_table (struct Lisp_Hash_Table *h1)
|
|||
h2->next = hash_table_alloc_bytes (next_bytes);
|
||||
memcpy (h2->next, h1->next, next_bytes);
|
||||
|
||||
ptrdiff_t index_bytes = h1->index_size * sizeof *h1->index;
|
||||
ptrdiff_t index_bytes = hash_table_index_size (h1) * sizeof *h1->index;
|
||||
h2->index = hash_table_alloc_bytes (index_bytes);
|
||||
memcpy (h2->index, h1->index, index_bytes);
|
||||
}
|
||||
|
|
@ -4668,8 +4662,11 @@ copy_hash_table (struct Lisp_Hash_Table *h1)
|
|||
static inline ptrdiff_t
|
||||
hash_index_index (struct Lisp_Hash_Table *h, hash_hash_t hash)
|
||||
{
|
||||
eassert (h->index_size > 0);
|
||||
return hash % h->index_size;
|
||||
/* Knuth multiplicative hashing, tailored for 32-bit indices
|
||||
(avoiding a 64-bit multiply). */
|
||||
uint32_t alpha = 2654435769; /* 2**32/phi */
|
||||
/* Note the cast to uint64_t, to make it work for index_bits=0. */
|
||||
return (uint64_t)((uint32_t)hash * alpha) >> (32 - h->index_bits);
|
||||
}
|
||||
|
||||
/* Resize hash table H if it's too full. If H cannot be resized
|
||||
|
|
@ -4681,7 +4678,7 @@ maybe_resize_hash_table (struct Lisp_Hash_Table *h)
|
|||
if (h->next_free < 0)
|
||||
{
|
||||
ptrdiff_t old_size = HASH_TABLE_SIZE (h);
|
||||
ptrdiff_t min_size = 8;
|
||||
ptrdiff_t min_size = 6;
|
||||
ptrdiff_t base_size = min (max (old_size, min_size), PTRDIFF_MAX / 2);
|
||||
/* Grow aggressively at small sizes, then just double. */
|
||||
ptrdiff_t new_size =
|
||||
|
|
@ -4706,13 +4703,14 @@ maybe_resize_hash_table (struct Lisp_Hash_Table *h)
|
|||
hash_hash_t *hash = hash_table_alloc_bytes (new_size * sizeof *hash);
|
||||
memcpy (hash, h->hash, old_size * sizeof *hash);
|
||||
|
||||
ptrdiff_t old_index_size = h->index_size;
|
||||
ptrdiff_t index_size = hash_index_size (new_size);
|
||||
ptrdiff_t old_index_size = hash_table_index_size (h);
|
||||
ptrdiff_t index_bits = compute_hash_index_bits (new_size);
|
||||
ptrdiff_t index_size = (ptrdiff_t)1 << index_bits;
|
||||
hash_idx_t *index = hash_table_alloc_bytes (index_size * sizeof *index);
|
||||
for (ptrdiff_t i = 0; i < index_size; i++)
|
||||
index[i] = -1;
|
||||
|
||||
h->index_size = index_size;
|
||||
h->index_bits = index_bits;
|
||||
h->table_size = new_size;
|
||||
h->next_free = old_size;
|
||||
|
||||
|
|
@ -4778,18 +4776,19 @@ hash_table_thaw (Lisp_Object hash_table)
|
|||
h->key_and_value = NULL;
|
||||
h->hash = NULL;
|
||||
h->next = NULL;
|
||||
h->index_size = 1;
|
||||
h->index_bits = 0;
|
||||
h->index = (hash_idx_t *)empty_hash_index_vector;
|
||||
}
|
||||
else
|
||||
{
|
||||
ptrdiff_t index_size = hash_index_size (size);
|
||||
h->index_size = index_size;
|
||||
ptrdiff_t index_bits = compute_hash_index_bits (size);
|
||||
h->index_bits = index_bits;
|
||||
|
||||
h->hash = hash_table_alloc_bytes (size * sizeof *h->hash);
|
||||
|
||||
h->next = hash_table_alloc_bytes (size * sizeof *h->next);
|
||||
|
||||
ptrdiff_t index_size = hash_table_index_size (h);
|
||||
h->index = hash_table_alloc_bytes (index_size * sizeof *h->index);
|
||||
for (ptrdiff_t i = 0; i < index_size; i++)
|
||||
h->index[i] = -1;
|
||||
|
|
@ -4937,7 +4936,8 @@ hash_clear (struct Lisp_Hash_Table *h)
|
|||
set_hash_value_slot (h, i, Qnil);
|
||||
}
|
||||
|
||||
for (ptrdiff_t i = 0; i < h->index_size; i++)
|
||||
ptrdiff_t index_size = hash_table_index_size (h);
|
||||
for (ptrdiff_t i = 0; i < index_size; i++)
|
||||
h->index[i] = -1;
|
||||
|
||||
h->next_free = 0;
|
||||
|
|
@ -4976,7 +4976,7 @@ keep_entry_p (hash_table_weakness_t weakness,
|
|||
bool
|
||||
sweep_weak_table (struct Lisp_Hash_Table *h, bool remove_entries_p)
|
||||
{
|
||||
ptrdiff_t n = h->index_size;
|
||||
ptrdiff_t n = hash_table_index_size (h);
|
||||
bool marked = false;
|
||||
|
||||
for (ptrdiff_t bucket = 0; bucket < n; ++bucket)
|
||||
|
|
@ -5701,7 +5701,7 @@ DEFUN ("internal--hash-table-histogram",
|
|||
struct Lisp_Hash_Table *h = check_hash_table (hash_table);
|
||||
ptrdiff_t size = HASH_TABLE_SIZE (h);
|
||||
ptrdiff_t *freq = xzalloc (size * sizeof *freq);
|
||||
ptrdiff_t index_size = h->index_size;
|
||||
ptrdiff_t index_size = hash_table_index_size (h);
|
||||
for (ptrdiff_t i = 0; i < index_size; i++)
|
||||
{
|
||||
ptrdiff_t n = 0;
|
||||
|
|
@ -5729,7 +5729,7 @@ Internal use only. */)
|
|||
{
|
||||
struct Lisp_Hash_Table *h = check_hash_table (hash_table);
|
||||
Lisp_Object ret = Qnil;
|
||||
ptrdiff_t index_size = h->index_size;
|
||||
ptrdiff_t index_size = hash_table_index_size (h);
|
||||
for (ptrdiff_t i = 0; i < index_size; i++)
|
||||
{
|
||||
Lisp_Object bucket = Qnil;
|
||||
|
|
@ -5750,7 +5750,7 @@ DEFUN ("internal--hash-table-index-size",
|
|||
(Lisp_Object hash_table)
|
||||
{
|
||||
struct Lisp_Hash_Table *h = check_hash_table (hash_table);
|
||||
return make_int (h->index_size);
|
||||
return make_int (hash_table_index_size (h));
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
13
src/lisp.h
13
src/lisp.h
|
|
@ -2475,14 +2475,14 @@ struct Lisp_Hash_Table
|
|||
The table is physically split into three vectors (hash, next,
|
||||
key_and_value) which may or may not be beneficial. */
|
||||
|
||||
hash_idx_t index_size; /* Size of the index vector. */
|
||||
int index_bits; /* log2 (size of the index vector). */
|
||||
hash_idx_t table_size; /* Size of the next and hash vectors. */
|
||||
|
||||
/* Bucket vector. An entry of -1 indicates no item is present,
|
||||
and a nonnegative entry is the index of the first item in
|
||||
a collision chain.
|
||||
This vector is index_size entries long.
|
||||
If index_size is 1 (and table_size is 0), then this is the
|
||||
This vector is 2**index_bits entries long.
|
||||
If index_bits is 0 (and table_size is 0), then this is the
|
||||
constant read-only vector {-1}, shared between all instances.
|
||||
Otherwise it is heap-allocated. */
|
||||
hash_idx_t *index;
|
||||
|
|
@ -2597,6 +2597,13 @@ HASH_TABLE_SIZE (const struct Lisp_Hash_Table *h)
|
|||
return h->table_size;
|
||||
}
|
||||
|
||||
/* Size of the index vector in hash table H. */
|
||||
INLINE ptrdiff_t
|
||||
hash_table_index_size (const struct Lisp_Hash_Table *h)
|
||||
{
|
||||
return (ptrdiff_t)1 << h->index_bits;
|
||||
}
|
||||
|
||||
/* Hash value for KEY in hash table H. */
|
||||
INLINE hash_hash_t
|
||||
hash_from_key (struct Lisp_Hash_Table *h, Lisp_Object key)
|
||||
|
|
|
|||
|
|
@ -2688,7 +2688,7 @@ hash_table_freeze (struct Lisp_Hash_Table *h)
|
|||
h->hash = NULL;
|
||||
h->index = NULL;
|
||||
h->table_size = 0;
|
||||
h->index_size = 0;
|
||||
h->index_bits = 0;
|
||||
h->frozen_test = hash_table_std_test (h->test);
|
||||
h->test = NULL;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue