1
Fork 0
mirror of git://git.sv.gnu.org/emacs.git synced 2025-12-15 10:30:25 -08:00

Leaner hash table dumping and thawing

Only dump the actual data, and the test encoded as an enum.  This
simplifies dumping, makes dump files smaller and saves space at run
time.

* src/lisp.h (hash_table_std_test_t): New enum.
(struct Lisp_Hash_Table): Add frozen_test member, consuming no extra space.
* src/fns.c (hashfn_user_defined): Now static.
(hash_table_test_from_std): New.
(hash_table_rehash): Rename to...
(hash_table_thaw): ...this and rewrite.
* src/pdumper.c (hash_table_contents): Only include actual data, not
unused space.
(hash_table_std_test): New.
(hash_table_freeze): Set frozen_test from test.
(dump_hash_table): Dump frozen_test, not the whole test struct.
Don't bother other dumping fields that can be derived.
This commit is contained in:
Mattias Engdegård 2023-11-04 16:34:09 +01:00
parent c3d0cc50fa
commit d3cefd3e98
3 changed files with 72 additions and 60 deletions

View file

@ -4474,7 +4474,7 @@ hashfn_eql (Lisp_Object key, struct Lisp_Hash_Table *h)
/* Given H, return a hash code for KEY which uses a user-defined
function to compare keys. */
Lisp_Object
static Lisp_Object
hashfn_user_defined (Lisp_Object key, struct Lisp_Hash_Table *h)
{
Lisp_Object args[] = { h->test.user_hash_function, key };
@ -4638,11 +4638,10 @@ maybe_resize_hash_table (struct Lisp_Hash_Table *h)
if (h->next_free < 0)
{
ptrdiff_t old_size = HASH_TABLE_SIZE (h);
EMACS_INT new_size;
double float_new_size = old_size * std_rehash_size;
if (float_new_size < EMACS_INT_MAX)
new_size = float_new_size;
/* FIXME: better growth management, ditch std_rehash_size */
EMACS_INT new_size = old_size * std_rehash_size;
if (new_size < EMACS_INT_MAX)
new_size = max (new_size, 32); /* avoid slow initial growth */
else
new_size = EMACS_INT_MAX;
if (PTRDIFF_MAX < new_size)
@ -4691,20 +4690,39 @@ maybe_resize_hash_table (struct Lisp_Hash_Table *h)
}
}
/* Recompute the hashes (and hence also the "next" pointers).
Normally there's never a need to recompute hashes.
This is done only on first access to a hash-table loaded from
the "pdump", because the objects' addresses may have changed, thus
affecting their hashes. */
void
hash_table_rehash (Lisp_Object hash)
static const struct hash_table_test *
hash_table_test_from_std (hash_table_std_test_t test)
{
struct Lisp_Hash_Table *h = XHASH_TABLE (hash);
ptrdiff_t i, count = h->count;
switch (test)
{
case Test_eq: return &hashtest_eq;
case Test_eql: return &hashtest_eql;
case Test_equal: return &hashtest_equal;
}
emacs_abort();
}
/* Rebuild a hash table from its frozen (dumped) form. */
void
hash_table_thaw (Lisp_Object hash_table)
{
struct Lisp_Hash_Table *h = XHASH_TABLE (hash_table);
/* Freezing discarded most non-essential information; recompute it.
The allocation is minimal with no room for growth. */
h->test = *hash_table_test_from_std (h->frozen_test);
ptrdiff_t size = ASIZE (h->key_and_value) / 2;
h->count = size;
ptrdiff_t index_size = hash_index_size (size);
h->next_free = -1;
h->hash = make_nil_vector (size);
h->next = make_vector (size, make_fixnum (-1));
h->index = make_vector (index_size, make_fixnum (-1));
/* Recompute the actual hash codes for each entry in the table.
Order is still invalid. */
for (i = 0; i < count; i++)
for (ptrdiff_t i = 0; i < size; i++)
{
Lisp_Object key = HASH_KEY (h, i);
Lisp_Object hash_code = hash_from_key (h, key);
@ -4712,12 +4730,7 @@ hash_table_rehash (Lisp_Object hash)
set_hash_hash_slot (h, i, hash_code);
set_hash_next_slot (h, i, HASH_INDEX (h, start_of_bucket));
set_hash_index_slot (h, start_of_bucket, i);
eassert (HASH_NEXT (h, i) != i); /* Stop loops. */
}
ptrdiff_t size = ASIZE (h->next);
for (; i + 1 < size; i++)
set_hash_next_slot (h, i, i + 1);
}
/* Lookup KEY in hash table H. If HASH is non-null, return in *HASH

View file

@ -2385,6 +2385,12 @@ INLINE int
struct Lisp_Hash_Table;
typedef enum {
Test_eql,
Test_eq,
Test_equal,
} hash_table_std_test_t;
struct hash_table_test
{
/* Function used to compare keys; always a bare symbol. */
@ -2473,6 +2479,9 @@ struct Lisp_Hash_Table
/* Weakness of the table. */
hash_table_weakness_t weakness : 8;
/* Hash table test (only used when frozen in dump) */
hash_table_std_test_t frozen_test : 8;
/* True if the table can be purecopied. The table cannot be
changed afterwards. */
bool purecopy;
@ -2563,7 +2572,7 @@ hash_from_key (struct Lisp_Hash_Table *h, Lisp_Object key)
return h->test.hashfn (key, h);
}
void hash_table_rehash (Lisp_Object);
void hash_table_thaw (Lisp_Object hash_table);
/* Default size for hash tables if not specified. */
@ -4038,7 +4047,6 @@ extern void hexbuf_digest (char *, void const *, int);
extern char *extract_data_from_object (Lisp_Object, ptrdiff_t *, ptrdiff_t *);
EMACS_UINT hash_string (char const *, ptrdiff_t);
EMACS_UINT sxhash (Lisp_Object);
Lisp_Object hashfn_user_defined (Lisp_Object, struct Lisp_Hash_Table *);
Lisp_Object make_hash_table (struct hash_table_test, EMACS_INT,
hash_table_weakness_t, bool);
Lisp_Object hash_table_weakness_symbol (hash_table_weakness_t weak);

View file

@ -2646,34 +2646,26 @@ dump_vectorlike_generic (struct dump_context *ctx,
return offset;
}
/* Return a vector of KEY, VALUE pairs in the given hash table H. The
first H->count pairs are valid, and the rest are unbound. */
/* Return a vector of KEY, VALUE pairs in the given hash table H.
No room for growth is included. */
static Lisp_Object
hash_table_contents (struct Lisp_Hash_Table *h)
{
if (h->test.hashfn == hashfn_user_defined)
error ("cannot dump hash tables with user-defined tests"); /* Bug#36769 */
ptrdiff_t size = HASH_TABLE_SIZE (h);
ptrdiff_t old_size = HASH_TABLE_SIZE (h);
ptrdiff_t size = h->count;
Lisp_Object key_and_value = make_uninit_vector (2 * size);
ptrdiff_t n = 0;
/* Make sure key_and_value ends up in the same order; charset.c
relies on it by expecting hash table indices to stay constant
across the dump. */
for (ptrdiff_t i = 0; i < size; i++)
for (ptrdiff_t i = 0; i < old_size; i++)
if (!NILP (HASH_HASH (h, i)))
{
ASET (key_and_value, n++, HASH_KEY (h, i));
ASET (key_and_value, n++, HASH_VALUE (h, i));
}
while (n < 2 * size)
{
ASET (key_and_value, n++, Qunbound);
ASET (key_and_value, n++, Qnil);
}
return key_and_value;
}
@ -2686,25 +2678,32 @@ dump_hash_table_list (struct dump_context *ctx)
return 0;
}
static hash_table_std_test_t
hash_table_std_test (const struct hash_table_test *t)
{
if (BASE_EQ (t->name, Qeq))
return Test_eq;
if (BASE_EQ (t->name, Qeql))
return Test_eql;
if (BASE_EQ (t->name, Qequal))
return Test_equal;
error ("cannot dump hash tables with user-defined tests"); /* Bug#36769 */
}
/* Compact contents and discard inessential information from a hash table,
preparing it for dumping.
See `hash_table_thaw' for the code that restores the object to a usable
state. */
static void
hash_table_freeze (struct Lisp_Hash_Table *h)
{
ptrdiff_t npairs = ASIZE (h->key_and_value) / 2;
h->key_and_value = hash_table_contents (h);
h->next = h->hash = make_fixnum (npairs);
h->index = make_fixnum (ASIZE (h->index));
h->next_free = (npairs == h->count ? -1 : h->count);
}
static void
hash_table_thaw (Lisp_Object hash)
{
struct Lisp_Hash_Table *h = XHASH_TABLE (hash);
h->hash = make_nil_vector (XFIXNUM (h->hash));
h->next = Fmake_vector (h->next, make_fixnum (-1));
h->index = Fmake_vector (h->index, make_fixnum (-1));
hash_table_rehash (hash);
eassert (ASIZE (h->key_and_value) == h->count * 2);
h->next = Qnil;
h->hash = Qnil;
h->index = Qnil;
h->count = 0;
h->frozen_test = hash_table_std_test (&h->test);
}
static dump_off
@ -2724,19 +2723,11 @@ dump_hash_table (struct dump_context *ctx, Lisp_Object object)
dump_pseudovector_lisp_fields (ctx, &out->header, &hash->header);
/* TODO: dump the hash bucket vectors synchronously here to keep
them as close to the hash table as possible. */
DUMP_FIELD_COPY (out, hash, count);
DUMP_FIELD_COPY (out, hash, next_free);
DUMP_FIELD_COPY (out, hash, weakness);
DUMP_FIELD_COPY (out, hash, purecopy);
DUMP_FIELD_COPY (out, hash, mutable);
DUMP_FIELD_COPY (out, hash, frozen_test);
dump_field_lv (ctx, out, hash, &hash->key_and_value, WEIGHT_STRONG);
dump_field_lv (ctx, out, hash, &hash->test.name, WEIGHT_STRONG);
dump_field_lv (ctx, out, hash, &hash->test.user_hash_function,
WEIGHT_STRONG);
dump_field_lv (ctx, out, hash, &hash->test.user_cmp_function,
WEIGHT_STRONG);
dump_field_emacs_ptr (ctx, out, hash, &hash->test.cmpfn);
dump_field_emacs_ptr (ctx, out, hash, &hash->test.hashfn);
eassert (hash->next_weak == NULL);
return finish_dump_pvec (ctx, &out->header);
}