1
Fork 0
mirror of git://git.sv.gnu.org/emacs.git synced 2025-12-15 10:30:25 -08:00

Change hash range reduction from remainder to multiplication

This makes both lookups and rehashing cheaper.  The index vector size
is now always a power of 2.  The first table size is reduced to
6 (from 8), because index vectors would become excessively big
otherwise.

* src/lisp.h (struct Lisp_Hash_Table): Replace index_size with
index_bits.  All references adapted.
(hash_table_index_size): New accessor; use it where applicable.
* src/fns.c (hash_index_size): Replace with...
(compute_hash_index_bits): ...this new function, returning the log2 of the
index size.  All callers adapted.
(hash_index_index): Knuth multiplicative hashing instead of remainder.
(maybe_resize_hash_table): Reduce first table size from 8 to 6.
This commit is contained in:
Mattias Engdegård 2024-01-15 09:25:02 +01:00
parent f6225d125c
commit e66870400d
4 changed files with 53 additions and 45 deletions

View file

@ -3443,7 +3443,7 @@ cleanup_vector (struct Lisp_Vector *vector)
struct Lisp_Hash_Table *h = PSEUDOVEC_STRUCT (vector, Lisp_Hash_Table);
if (h->table_size > 0)
{
eassert (h->index_size > 1);
eassert (h->index_bits > 0);
xfree (h->index);
xfree (h->key_and_value);
xfree (h->next);
@ -3451,7 +3451,7 @@ cleanup_vector (struct Lisp_Vector *vector)
ptrdiff_t bytes = (h->table_size * (2 * sizeof *h->key_and_value
+ sizeof *h->hash
+ sizeof *h->next)
+ h->index_size * sizeof *h->index);
+ hash_table_index_size (h) * sizeof *h->index);
hash_table_allocated_bytes -= bytes;
}
}
@ -5959,7 +5959,8 @@ purecopy_hash_table (struct Lisp_Hash_Table *table)
for (ptrdiff_t i = 0; i < nvalues; i++)
pure->key_and_value[i] = purecopy (table->key_and_value[i]);
ptrdiff_t index_bytes = table->index_size * sizeof *table->index;
ptrdiff_t index_bytes = hash_table_index_size (table)
* sizeof *table->index;
pure->index = pure_alloc (index_bytes, -(int)sizeof *table->index);
memcpy (pure->index, table->index, index_bytes);
}

View file

@ -4291,7 +4291,7 @@ set_hash_hash_slot (struct Lisp_Hash_Table *h, ptrdiff_t idx, hash_hash_t val)
static void
set_hash_index_slot (struct Lisp_Hash_Table *h, ptrdiff_t idx, ptrdiff_t val)
{
eassert (idx >= 0 && idx < h->index_size);
eassert (idx >= 0 && idx < hash_table_index_size (h));
h->index[idx] = val;
}
@ -4392,7 +4392,7 @@ HASH_NEXT (struct Lisp_Hash_Table *h, ptrdiff_t idx)
static ptrdiff_t
HASH_INDEX (struct Lisp_Hash_Table *h, ptrdiff_t idx)
{
eassert (idx >= 0 && idx < h->index_size);
eassert (idx >= 0 && idx < hash_table_index_size (h));
return h->index[idx];
}
@ -4527,26 +4527,19 @@ allocate_hash_table (void)
return ALLOCATE_PLAIN_PSEUDOVECTOR (struct Lisp_Hash_Table, PVEC_HASH_TABLE);
}
/* Compute the size of the index from the table capacity. */
static ptrdiff_t
hash_index_size (ptrdiff_t size)
/* Compute the size of the index (as log2) from the table capacity. */
static int
compute_hash_index_bits (hash_idx_t size)
{
/* An upper bound on the size of a hash table index. It must fit in
ptrdiff_t and be a valid Emacs fixnum. */
ptrdiff_t upper_bound = min (MOST_POSITIVE_FIXNUM,
min (TYPE_MAXIMUM (hash_idx_t),
PTRDIFF_MAX / sizeof (ptrdiff_t)));
/* Single-element index vectors are used iff size=0. */
eassert (size > 0);
ptrdiff_t lower_bound = 2;
ptrdiff_t index_size = size + max (size >> 2, 1); /* 1.25x larger */
if (index_size < upper_bound)
index_size = (index_size < lower_bound
? lower_bound
: next_almost_prime (index_size));
if (index_size > upper_bound)
/* An upper bound on the size of a hash table index index. */
hash_idx_t upper_bound = min (MOST_POSITIVE_FIXNUM,
min (TYPE_MAXIMUM (hash_idx_t),
PTRDIFF_MAX / sizeof (hash_idx_t)));
/* Use next higher power of 2. This works even for size=0. */
int bits = elogb (size) + 1;
if (bits >= TYPE_WIDTH (uintmax_t) || ((uintmax_t)1 << bits) > upper_bound)
error ("Hash table too large");
return index_size;
return bits;
}
/* Constant hash index vector used when the table size is zero.
@ -4587,7 +4580,7 @@ make_hash_table (const struct hash_table_test *test, EMACS_INT size,
h->key_and_value = NULL;
h->hash = NULL;
h->next = NULL;
h->index_size = 1;
h->index_bits = 0;
h->index = (hash_idx_t *)empty_hash_index_vector;
h->next_free = -1;
}
@ -4605,8 +4598,9 @@ make_hash_table (const struct hash_table_test *test, EMACS_INT size,
h->next[i] = i + 1;
h->next[size - 1] = -1;
int index_size = hash_index_size (size);
h->index_size = index_size;
int index_bits = compute_hash_index_bits (size);
h->index_bits = index_bits;
ptrdiff_t index_size = hash_table_index_size (h);
h->index = hash_table_alloc_bytes (index_size * sizeof *h->index);
for (ptrdiff_t i = 0; i < index_size; i++)
h->index[i] = -1;
@ -4654,7 +4648,7 @@ copy_hash_table (struct Lisp_Hash_Table *h1)
h2->next = hash_table_alloc_bytes (next_bytes);
memcpy (h2->next, h1->next, next_bytes);
ptrdiff_t index_bytes = h1->index_size * sizeof *h1->index;
ptrdiff_t index_bytes = hash_table_index_size (h1) * sizeof *h1->index;
h2->index = hash_table_alloc_bytes (index_bytes);
memcpy (h2->index, h1->index, index_bytes);
}
@ -4668,8 +4662,11 @@ copy_hash_table (struct Lisp_Hash_Table *h1)
static inline ptrdiff_t
hash_index_index (struct Lisp_Hash_Table *h, hash_hash_t hash)
{
eassert (h->index_size > 0);
return hash % h->index_size;
/* Knuth multiplicative hashing, tailored for 32-bit indices
(avoiding a 64-bit multiply). */
uint32_t alpha = 2654435769; /* 2**32/phi */
/* Note the cast to uint64_t, to make it work for index_bits=0. */
return (uint64_t)((uint32_t)hash * alpha) >> (32 - h->index_bits);
}
/* Resize hash table H if it's too full. If H cannot be resized
@ -4681,7 +4678,7 @@ maybe_resize_hash_table (struct Lisp_Hash_Table *h)
if (h->next_free < 0)
{
ptrdiff_t old_size = HASH_TABLE_SIZE (h);
ptrdiff_t min_size = 8;
ptrdiff_t min_size = 6;
ptrdiff_t base_size = min (max (old_size, min_size), PTRDIFF_MAX / 2);
/* Grow aggressively at small sizes, then just double. */
ptrdiff_t new_size =
@ -4706,13 +4703,14 @@ maybe_resize_hash_table (struct Lisp_Hash_Table *h)
hash_hash_t *hash = hash_table_alloc_bytes (new_size * sizeof *hash);
memcpy (hash, h->hash, old_size * sizeof *hash);
ptrdiff_t old_index_size = h->index_size;
ptrdiff_t index_size = hash_index_size (new_size);
ptrdiff_t old_index_size = hash_table_index_size (h);
ptrdiff_t index_bits = compute_hash_index_bits (new_size);
ptrdiff_t index_size = (ptrdiff_t)1 << index_bits;
hash_idx_t *index = hash_table_alloc_bytes (index_size * sizeof *index);
for (ptrdiff_t i = 0; i < index_size; i++)
index[i] = -1;
h->index_size = index_size;
h->index_bits = index_bits;
h->table_size = new_size;
h->next_free = old_size;
@ -4778,18 +4776,19 @@ hash_table_thaw (Lisp_Object hash_table)
h->key_and_value = NULL;
h->hash = NULL;
h->next = NULL;
h->index_size = 1;
h->index_bits = 0;
h->index = (hash_idx_t *)empty_hash_index_vector;
}
else
{
ptrdiff_t index_size = hash_index_size (size);
h->index_size = index_size;
ptrdiff_t index_bits = compute_hash_index_bits (size);
h->index_bits = index_bits;
h->hash = hash_table_alloc_bytes (size * sizeof *h->hash);
h->next = hash_table_alloc_bytes (size * sizeof *h->next);
ptrdiff_t index_size = hash_table_index_size (h);
h->index = hash_table_alloc_bytes (index_size * sizeof *h->index);
for (ptrdiff_t i = 0; i < index_size; i++)
h->index[i] = -1;
@ -4937,7 +4936,8 @@ hash_clear (struct Lisp_Hash_Table *h)
set_hash_value_slot (h, i, Qnil);
}
for (ptrdiff_t i = 0; i < h->index_size; i++)
ptrdiff_t index_size = hash_table_index_size (h);
for (ptrdiff_t i = 0; i < index_size; i++)
h->index[i] = -1;
h->next_free = 0;
@ -4976,7 +4976,7 @@ keep_entry_p (hash_table_weakness_t weakness,
bool
sweep_weak_table (struct Lisp_Hash_Table *h, bool remove_entries_p)
{
ptrdiff_t n = h->index_size;
ptrdiff_t n = hash_table_index_size (h);
bool marked = false;
for (ptrdiff_t bucket = 0; bucket < n; ++bucket)
@ -5701,7 +5701,7 @@ DEFUN ("internal--hash-table-histogram",
struct Lisp_Hash_Table *h = check_hash_table (hash_table);
ptrdiff_t size = HASH_TABLE_SIZE (h);
ptrdiff_t *freq = xzalloc (size * sizeof *freq);
ptrdiff_t index_size = h->index_size;
ptrdiff_t index_size = hash_table_index_size (h);
for (ptrdiff_t i = 0; i < index_size; i++)
{
ptrdiff_t n = 0;
@ -5729,7 +5729,7 @@ Internal use only. */)
{
struct Lisp_Hash_Table *h = check_hash_table (hash_table);
Lisp_Object ret = Qnil;
ptrdiff_t index_size = h->index_size;
ptrdiff_t index_size = hash_table_index_size (h);
for (ptrdiff_t i = 0; i < index_size; i++)
{
Lisp_Object bucket = Qnil;
@ -5750,7 +5750,7 @@ DEFUN ("internal--hash-table-index-size",
(Lisp_Object hash_table)
{
struct Lisp_Hash_Table *h = check_hash_table (hash_table);
return make_int (h->index_size);
return make_int (hash_table_index_size (h));
}

View file

@ -2475,14 +2475,14 @@ struct Lisp_Hash_Table
The table is physically split into three vectors (hash, next,
key_and_value) which may or may not be beneficial. */
hash_idx_t index_size; /* Size of the index vector. */
int index_bits; /* log2 (size of the index vector). */
hash_idx_t table_size; /* Size of the next and hash vectors. */
/* Bucket vector. An entry of -1 indicates no item is present,
and a nonnegative entry is the index of the first item in
a collision chain.
This vector is index_size entries long.
If index_size is 1 (and table_size is 0), then this is the
This vector is 2**index_bits entries long.
If index_bits is 0 (and table_size is 0), then this is the
constant read-only vector {-1}, shared between all instances.
Otherwise it is heap-allocated. */
hash_idx_t *index;
@ -2597,6 +2597,13 @@ HASH_TABLE_SIZE (const struct Lisp_Hash_Table *h)
return h->table_size;
}
/* Size of the index vector in hash table H. */
INLINE ptrdiff_t
hash_table_index_size (const struct Lisp_Hash_Table *h)
{
return (ptrdiff_t)1 << h->index_bits;
}
/* Hash value for KEY in hash table H. */
INLINE hash_hash_t
hash_from_key (struct Lisp_Hash_Table *h, Lisp_Object key)

View file

@ -2688,7 +2688,7 @@ hash_table_freeze (struct Lisp_Hash_Table *h)
h->hash = NULL;
h->index = NULL;
h->table_size = 0;
h->index_size = 0;
h->index_bits = 0;
h->frozen_test = hash_table_std_test (h->test);
h->test = NULL;
}