1
Fork 0
mirror of git://git.sv.gnu.org/emacs.git synced 2026-01-13 06:50:39 -08:00
emacs/mps/code/table.c
Richard Brooksby 46e49ccff6 Make white table global, maintained in segsetwhite. it can now be shared between traces.
Copied from Perforce
 Change: 190133
 ServerID: perforce.ravenbrook.com
2016-03-17 08:14:37 +00:00

619 lines
17 KiB
C

/* table.c: A dictionary mapping a Word to a void*
*
* $Id$
* Copyright (c) 2001-2016 Ravenbrook Limited. See end of file for license.
*/
#include "table.h"
#include "mpm.h"
#include <stddef.h>
SRCID(table, "$Id$");
#define TABLE_REHASH_TRIES 8
#define TableLength(table) ((Count)1 << (table)->log2length)
Bool TableCheck(Table table)
{
CHECKS(Table, table);
CHECKL(table->count <= TableLength(table));
CHECKL(table->array != NULL);
CHECKL(table->maxChainLength >= 2);
CHECKL(table->maxChainLength <= TableLength(table));
CHECKL(FUNCHECK(table->alloc));
CHECKL(FUNCHECK(table->free));
/* can't check allocClosure -- it could be anything */
CHECKL(table->unusedKey != table->deletedKey);
CHECKL((table->posHashParamStruct.multiplicand & 1) != 0);
CHECKL((table->skipHashParamStruct.multiplicand & 1) != 0);
return TRUE;
}
/* <https://en.wikipedia.org/wiki/Universal_hashing> */
/* FIXME: This is a copy of the random number generator from
testlib.c. It probably needs importing into the MPS properly,
perhaps in its own module, and the unit tests moving out of
testlib.c as well. */
static unsigned long tableSeed = 1;
#define R_m 2147483647UL
#define R_a 48271UL
static unsigned long tableRnd(void)
{
/* requires m == 2^31-1, a < 2^16 */
unsigned long bot = R_a * (tableSeed & 0x7FFF);
unsigned long top = R_a * (tableSeed >> 15);
tableSeed = bot + ((top & 0xFFFF) << 15) + (top >> 16);
if(tableSeed > R_m)
tableSeed -= R_m;
return tableSeed;
/* Have you modified this code? Run rnd_verify(3) please! RHSK */
}
static Word tableRnd64(void)
{
return ((Word)tableRnd() << 32) | (Word)tableRnd();
}
static void tableHashParamInit(TableHashParam param)
{
param->multiplicand = tableRnd64() | 1;
param->addend = tableRnd64();
}
static Word tableHash(TableHashParam param, TableKey key, Shift log2length)
{
Word product = key * param->multiplicand;
Word sum = product + param->addend;
return sum >> (sizeof(Word) * CHAR_BIT - log2length);
}
/* tableFind -- finds the entry for this key, or NULL
*
* .worst: In the worst case, this looks at every slot before giving up,
* but that's what you have to do in a closed hash table, to make sure
* that all the items still fit in after growing the table.
*/
static void tablePos(Index *posReturn, Count *skipReturn, Table table, TableKey key)
{
*posReturn = tableHash(&table->posHashParamStruct, key, table->log2length);
*skipReturn = tableHash(&table->skipHashParamStruct, key, table->log2length) | 1;
}
static Index tableStep(Index pos, Count skip, Count length)
{
return (pos + skip) & (length - 1);
}
static TableEntry tableFind(Table table, TableKey key)
{
Index i;
Count length = (Count)1 << table->log2length;
Index pos;
Count skip;
tablePos(&pos, &skip, table, key);
for (i = 0; i < table->maxChainLength; ++i) {
if (table->array[pos].key == key)
return &table->array[pos];
pos = tableStep(pos, skip, length);
}
return NULL;
}
#ifdef TABLE_DEBUG
static Bool tableFindBrute(Table table, TableKey key)
{
Index i;
for (i = 0; i < TableLength(table); ++i)
if (table->array[i].key == key)
return TRUE;
return FALSE;
}
#endif
/* tablePut -- put a key/value pair into the table
*
* Attempts to add a key/value pair to the table. Returns ResFAIL if
* the key (or another key) is duplicated in the table, or ResLIMIT if
* it was not possible to place the key in a chain not longer than
* maxChainLength hops using the current hash function.
*
* Uses a displacing mechanism similar to a cuckoo hash to move
* entries out of the way, in order to guarantee O(1) lookups. If
* insert fails with ResLIMIT, *keyIO and *valueIO contain a displaced
* key/value pair, which is no longer in the table.
*/
static Res tablePut(Table table, TableKey *keyIO, TableValue *valueIO)
{
Index j;
TableKey key = *keyIO;
TableValue value = *valueIO;
Count length = TableLength(table);
for (j = 0; j < length; ++j) { /* detect cycle */
Index i;
Index pos, last;
Count skip;
TableKey tk;
TableValue tv;
#ifdef TABLE_DEBUG
AVER(!tableFindBrute(table, key));
#endif
tablePos(&pos, &skip, table, key);
last = pos;
for (i = 0; i < table->maxChainLength; ++i) {
tk = table->array[pos].key;
if (tk == table->unusedKey || tk == table->deletedKey) {
table->array[pos].key = key;
table->array[pos].value = value;
++table->count;
return ResOK;
}
if (tk == key)
return ResFAIL;
last = pos;
pos = tableStep(pos, skip, length);
}
/* Chain is full. Kick out last slot and try to insert it elsewhere. */
tk = table->array[last].key;
tv = table->array[last].value;
AVER(tk != key);
AVER(tk != table->unusedKey);
AVER(tk != table->deletedKey);
table->array[last].key = key;
table->array[last].value = value;
key = tk;
value = tv;
}
/* Cycle detected. Give up. */
*keyIO = key;
*valueIO = value;
return ResLIMIT;
}
/* tablePutSomewhere -- put the key/value pair somewhere random
*
* Adds a key/value pair to a random unoccupied slot in the table.
* This makes the table invalid, since the key is unlikely to be
* found. It is intended for forcing a key/value pair into the table
* before a rehash.
*/
static void tablePutSomewhere(Table table, TableKey key, TableValue value)
{
Index i;
AVER(table->count < TableLength(table));
for (i = 0; i < TableLength(table); ++i) {
TableKey tk = table->array[i].key;
AVER(tk != key);
if (tk == table->unusedKey || tk == table->deletedKey)
goto found;
}
NOTREACHED;
found:
table->array[i].key = key;
table->array[i].value = value;
++table->count;
for (i++; i < TableLength(table); ++i) {
AVER(table->array[i].key != key);
}
}
/* tableClear -- set all entries in a table to unused */
static void tableClear(Table table)
{
Count length = TableLength(table);
TableEntry array = table->array;
TableKey unusedKey = table->unusedKey;
Index i;
for (i = 0; i < length; ++i)
array[i].key = unusedKey;
}
/* tableRehashTry -- make one attempt to rehash the table
*
* Generate a new random hash function and make a pass over the table
* rehashing in place. Returns ResLIMIT if it was not possible to
* rehash with chains not longer than maxChainLength hops using the
* hash function.
*/
static Res tableRehashTry(Table table)
{
Index i;
Res res;
Count length = TableLength(table);
TableEntry array = table->array;
tableHashParamInit(&table->posHashParamStruct);
tableHashParamInit(&table->skipHashParamStruct);
for (i = 0; i < length; ++i) {
if (array[i].key != table->unusedKey) {
TableKey key = array[i].key;
TableValue value = array[i].value;
array[i].key = table->unusedKey;
--table->count;
res = tablePut(table, &key, &value);
if (res != ResOK) {
AVER(res != ResFAIL); /* duplicate key */
tablePutSomewhere(table, key, value);
return res;
}
}
}
return ResOK;
}
/* tableRehash -- rehash the table making with multiple attempts
*
* Make TABLE_REHASH_TRIES attempts to randomly rehash the table. If
* that fails, increase the maximum chain length and try again. This
* is guaranteed to terminate since the chain length will eventually
* equal the table length, resulting in a very poor hash table.
* However, this is extraordinarily unlikely.
*
* TODO: Experiment with growing the table instead of extending the
* chain length, to keep lookup times down. In fact, that may support
* a fixed chain length of 2 and a faster cuckoo lookup.
*/
static void tableRehash(Table table)
{
Index i;
for (;;) {
for (i = 0; i < TABLE_REHASH_TRIES; ++i) {
Res res = tableRehashTry(table);
if (res == ResOK)
return;
AVER(res == ResLIMIT);
}
++table->maxChainLength;
/* Even in the worst case (a single chain) the maximum chain length
can't exceed the number of table entries. */
AVER(table->maxChainLength <= table->count);
}
}
/* TableGrow -- increase the capacity of the table
*
* Ensure the transform's hashtable can accommodate N entries (filled
* slots), without becoming cramped. If necessary, resize the
* hashtable by allocating a new one and rehashing all old entries.
* If insufficient memory, return error without modifying table.
*
* .hash.spacefraction: As with all closed hash tables, we must choose
* an appropriate proportion of slots to remain free. More free slots
* help avoid large-sized contiguous clumps of full cells and their
* associated linear search costs.
*
* .hash.initial: Any reasonable number.
*
* .hash.growth: A compromise between space inefficiency (growing bigger
* than required) and time inefficiency (growing too slowly, with all
* the rehash costs at every step). A factor of 2 means that at the
* point of growing to a size X table, hash-work equivalent to filling
* a size-X table has already been done. So we do at most 2x the
* hash-work we would have done if we had been able to guess the right
* table size initially.
*
* Numbers of slots maintain this relation:
* occupancy <= capacity < enough <= cSlots
*/
#define SPACEFRACTION 0.5 /* .hash.spacefraction */
Res TableGrow(Table table, Count extraCapacity)
{
TableEntry oldArray, newArray;
Count oldLength, newLength;
Count required, minimum;
Count oldCount;
required = table->count + extraCapacity;
if (required < table->count) /* overflow? */
return ResLIMIT;
/* Calculate the minimum table length that would allow for the required
capacity without growing again. */
minimum = (Count)(required / SPACEFRACTION);
if (minimum < required) /* overflow? */
return ResLIMIT;
/* Double the table length until it's larger than the minimum */
oldLength = TableLength(table);
newLength = oldLength;
while(newLength < minimum) {
Count doubled = newLength > 0 ? newLength * 2 : 1; /* .hash.growth */
if (doubled <= newLength) /* overflow? */
return ResLIMIT;
newLength = doubled;
}
if (newLength == oldLength) /* already enough space? */
return ResOK;
/* TODO: An event would be good here */
oldArray = table->array;
newArray = table->alloc(table->allocClosure,
sizeof(TableEntryStruct) * newLength);
if(newArray == NULL)
return ResMEMORY;
table->log2length = SizeLog2(newLength);
table->array = newArray;
oldCount = table->count;
tableClear(table);
mps_lib_memcpy(newArray, oldArray, sizeof(TableEntryStruct) * oldLength);
tableRehash(table);
AVER(table->count == oldCount);
if (oldLength > 0) {
AVER(oldArray != NULL);
table->free(table->allocClosure,
oldArray,
sizeof(TableEntryStruct) * oldLength);
}
return ResOK;
}
/* TableCreate -- makes a new table */
extern Res TableCreate(Table *tableReturn,
Count length,
TableAllocFunction tableAlloc,
TableFreeFunction tableFree,
void *allocClosure,
TableKey unusedKey,
TableKey deletedKey)
{
Table table;
Res res;
AVER(tableReturn != NULL);
AVER(FUNCHECK(tableAlloc));
AVER(FUNCHECK(tableFree));
AVER(unusedKey != deletedKey);
table = tableAlloc(allocClosure, sizeof(TableStruct));
if(table == NULL)
return ResMEMORY;
table->log2length = SizeLog2(length);
table->count = 0;
table->array = NULL;
table->alloc = tableAlloc;
table->free = tableFree;
table->allocClosure = allocClosure;
table->unusedKey = unusedKey;
table->deletedKey = deletedKey;
table->maxChainLength = 2;
table->array = tableAlloc(allocClosure, sizeof(TableEntryStruct) * length);
if (table->array == NULL) {
res = ResMEMORY;
goto failArrayAlloc;
}
tableHashParamInit(&table->posHashParamStruct);
tableHashParamInit(&table->skipHashParamStruct);
table->sig = TableSig;
tableClear(table);
AVERT(Table, table);
*tableReturn = table;
return ResOK;
failArrayAlloc:
tableFree(allocClosure, table, sizeof(TableEntryStruct) * length);
return res;
}
/* TableDestroy -- destroy a table */
extern void TableDestroy(Table table)
{
AVER(table != NULL);
table->free(table->allocClosure,
table->array,
sizeof(TableEntryStruct) * TableLength(table));
table->sig = SigInvalid;
table->free(table->allocClosure, table, sizeof(TableStruct));
}
/* TableLookup -- look up */
extern Bool TableLookup(TableValue *valueReturn, Table table, TableKey key)
{
TableEntry entry;
AVERT(Table, table);
AVER(key != table->unusedKey);
AVER(key != table->deletedKey);
entry = tableFind(table, key);
if (entry == NULL)
return FALSE;
*valueReturn = entry->value;
return TRUE;
}
/* TableDefine -- add a new mapping */
extern Res TableDefine(Table table, TableKey key, TableValue value)
{
Res res;
TableKey origKey = key;
TableValue origValue = value;
AVERT(Table, table);
AVER(key != table->unusedKey);
AVER(key != table->deletedKey);
/* value is arbitrary */
res = tablePut(table, &key, &value);
if (res == ResLIMIT) {
res = TableGrow(table, 1);
if (res != ResOK)
return res;
res = tablePut(table, &key, &value);
if (res == ResLIMIT) { /* doesn't fit with current hash */
tablePutSomewhere(table, key, value);
tableRehash(table);
res = ResOK;
}
}
AVER(res == ResOK || key == origKey);
AVER(res == ResOK || value == origValue);
return res;
}
/* TableRedefine -- redefine an existing mapping */
extern Res TableRedefine(Table table, TableKey key, TableValue value)
{
TableEntry entry;
AVERT(Table, table);
AVER(key != table->unusedKey);
AVER(key != table->deletedKey);
/* value is arbitrary */
entry = tableFind(table, key);
if (entry == NULL)
return ResFAIL;
AVER(entry->key == key);
entry->value = value;
return ResOK;
}
/* TableRemove -- remove a mapping */
extern Res TableRemove(Table table, TableKey key)
{
TableEntry entry;
AVERT(Table, table);
AVER(key != table->unusedKey);
AVER(key != table->deletedKey);
entry = tableFind(table, key);
if (entry == NULL)
return ResFAIL;
entry->key = table->deletedKey;
--table->count;
return ResOK;
}
/* TableMap -- apply a function to all the mappings
*
* Entries may be deleted during traversal. (This is depended on by
* impl.c.trace.reclaim.uniq.)
*/
extern void TableMap(Table table, TableVisitor visit, void *closure)
{
Index i;
for (i = 0; i < TableLength(table); i++) {
TableKey tk = table->array[i].key;
if (tk != table->unusedKey && tk != table->deletedKey)
visit(closure, tk, table->array[i].value);
}
}
/* TableCount -- count the number of mappings in the table */
extern Count TableCount(Table table)
{
return table->count;
}
/* C. COPYRIGHT AND LICENSE
*
* Copyright (C) 2001-2016 Ravenbrook Limited <http://www.ravenbrook.com/>.
* All rights reserved. This is an open source license. Contact
* Ravenbrook for commercial licensing options.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 3. Redistributions in any form must be accompanied by information on how
* to obtain complete source code for this software and any accompanying
* software that uses this software. The source code must either be
* included in the distribution or be available for no more than the cost
* of distribution plus a nominal fee, and must be freely redistributable
* under reasonable conditions. For an executable file, complete source
* code means the source code for all modules it contains. It does not
* include source code for modules or files that typically accompany the
* major components of the operating system on which the executable file
* runs.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
* IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
* PURPOSE, OR NON-INFRINGEMENT, ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDERS AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/