1
Fork 0
mirror of git://git.sv.gnu.org/emacs.git synced 2026-01-01 09:51:22 -08:00

Implement the global symbol table as a weak-value hash table from strings to symbols.

Copied from Perforce
 Change: 180294
 ServerID: perforce.ravenbrook.com
This commit is contained in:
Gareth Rees 2012-11-02 21:23:12 +00:00
parent d18dd95c1f
commit 7f34f0e8cb
3 changed files with 115 additions and 153 deletions

View file

@ -24,8 +24,6 @@
*
*
* MPS TO DO LIST
* - make the symbol table weak to show how to use weak references
* - add Scheme operators for talking to the MPS, forcing GC etc.
* - make an mps_perror
*
*
@ -122,8 +120,7 @@ typedef struct pair_s {
typedef struct symbol_s {
type_t type; /* TYPE_SYMBOL */
size_t length; /* length of symbol string (excl. NUL) */
char string[1]; /* symbol string, NUL terminated */
obj_t name; /* its name (a string) */
} symbol_s;
typedef struct integer_s {
@ -284,19 +281,14 @@ static size_t total;
/* symtab -- symbol table %%MPS
*
* The symbol table is a hash-table containing objects of TYPE_SYMBOL.
* When a string is "interned" it is looked up in the table, and added
* only if it is not there. This guarantees that all symbols which
* are equal are actually the same object.
*
* The symbol table is simply a malloc'd array of obj_t pointers. Since
* it's outside the MPS and refers to objects we want the MPS to keep
* alive, it must be declared to the MPS as a root. Search for
* occurrences of `symtab_root` to see how this is done.
* The symbol table is a weak-value hashtable mapping objects of
* TYPE_STRING to objects of TYPE_SYMBOL. When a string is "interned"
* it is looked up in the table, and added only if it is not there.
* This guarantees that all symbols which are equal are actually the
* same object.
*/
static obj_t *symtab;
static size_t symtab_size;
static obj_t symtab;
static mps_root_t symtab_root;
@ -497,19 +489,19 @@ static obj_t make_integer(long integer)
return obj;
}
static obj_t make_symbol(size_t length, char string[])
static obj_t make_symbol(obj_t name)
{
obj_t obj;
mps_addr_t addr;
size_t size = ALIGN(offsetof(symbol_s, string) + length+1);
size_t size = ALIGN(sizeof(symbol_s));
assert(TYPE(name) == TYPE_STRING);
do {
mps_res_t res = mps_reserve(&addr, leaf_ap, size);
mps_res_t res = mps_reserve(&addr, obj_ap, size);
if (res != MPS_RES_OK) error("out of memory in make_symbol");
obj = addr;
obj->symbol.type = TYPE_SYMBOL;
obj->symbol.length = length;
memcpy(obj->symbol.string, string, length+1);
} while(!mps_commit(leaf_ap, addr, size));
obj->symbol.name = name;
} while(!mps_commit(obj_ap, addr, size));
total += size;
return obj;
}
@ -732,91 +724,6 @@ static unsigned long hash(const char *s, size_t length) {
}
/* find -- find entry for symbol in symbol table
*
* Look for a symbol matching the string in the symbol table.
* If the symbol was found, returns the address of the symbol
* table entry which points to the symbol. Otherwise it
* either returns the address of a NULL entry into which the
* new symbol should be inserted, or NULL if the symbol table
* is full.
*/
static obj_t *find(char *string) {
unsigned long i, h, probe;
h = hash(string, strlen(string));
probe = (h >> 8) | 1;
h &= (symtab_size-1);
i = h;
do {
if(symtab[i] == NULL ||
strcmp(string, symtab[i]->symbol.string) == 0)
return &symtab[i];
i = (i+probe) & (symtab_size-1);
} while(i != h);
return NULL;
}
/* rehash -- double size of symbol table */
static void rehash(void) {
obj_t *old_symtab = symtab;
unsigned old_symtab_size = symtab_size;
mps_root_t old_symtab_root = symtab_root;
unsigned i;
mps_res_t res;
symtab_size *= 2;
symtab = malloc(sizeof(obj_t) * symtab_size);
if(symtab == NULL) error("out of memory");
/* Initialize the new table to NULL so that "find" will work. */
for(i = 0; i < symtab_size; ++i)
symtab[i] = NULL;
/* Once the symbol table is initialized with scannable references (NULL
in this case) we must register it as a root before we copy objects
across from the old symbol table. The MPS might be moving objects
in memory at any time, and will arrange that both copies are updated
atomically to the mutator (this interpreter). */
res = mps_root_create_table(&symtab_root, arena, mps_rank_exact(), 0,
(mps_addr_t *)symtab, symtab_size);
if(res != MPS_RES_OK) error("Couldn't register new symtab root");
for(i = 0; i < old_symtab_size; ++i)
if(old_symtab[i] != NULL) {
obj_t *where = find(old_symtab[i]->symbol.string);
assert(where != NULL); /* new table shouldn't be full */
assert(*where == NULL); /* shouldn't be in new table */
*where = old_symtab[i];
}
mps_root_destroy(old_symtab_root);
free(old_symtab);
}
/* union-find string in symbol table, rehashing if necessary */
static obj_t intern(char *string) {
obj_t *where;
where = find(string);
if(where == NULL) {
rehash();
where = find(string);
assert(where != NULL); /* shouldn't be full after rehash */
}
if(*where == NULL) /* symbol not found in table */
*where = make_symbol(strlen(string), string);
return *where;
}
/* Hash table implementation */
static unsigned long eq_hash(obj_t obj)
@ -1030,6 +937,33 @@ static void table_delete(obj_t tbl, obj_t key)
}
static obj_t intern_string(obj_t name)
{
obj_t symbol;
assert(TYPE(name) == TYPE_STRING);
symbol = table_ref(symtab, name);
if(symbol == NULL) {
symbol = make_symbol(name);
table_set(symtab, name, symbol);
}
return symbol;
}
static obj_t intern(char *string)
{
return intern_string(make_string(strlen(string), string));
}
static char *symbol_name(obj_t symbol)
{
assert(TYPE(symbol) == TYPE_SYMBOL);
assert(TYPE(symbol->symbol.name) == TYPE_STRING);
return symbol->symbol.name->string.string;
}
static void print(obj_t obj, unsigned depth, FILE *stream)
{
switch(TYPE(obj)) {
@ -1038,7 +972,7 @@ static void print(obj_t obj, unsigned depth, FILE *stream)
} break;
case TYPE_SYMBOL: {
fputs(obj->symbol.string, stream);
fputs(symbol_name(obj), stream);
} break;
case TYPE_SPECIAL: {
@ -1481,7 +1415,7 @@ static obj_t eval(obj_t env, obj_t op_env, obj_t exp)
if(TYPE(exp) == TYPE_SYMBOL) {
obj_t binding = lookup(env, exp);
if(binding == obj_undefined)
error("eval: unbound symbol \"%s\"", exp->symbol.string);
error("eval: unbound symbol \"%s\"", symbol_name(exp));
return CDR(binding);
}
@ -1969,7 +1903,7 @@ static obj_t entry_quasiquote(obj_t env, obj_t op_env, obj_t operator, obj_t ope
CAAR(list) == obj_unquote_splic)) {
unless(TYPE(CDAR(list)) == TYPE_PAIR &&
CDDAR(list) == obj_empty)
error("%s: illegal %s syntax", operator->operator.name, CAAR(list)->symbol.string);
error("%s: illegal %s syntax", operator->operator.name, symbol_name(CAAR(list)));
insert = eval(env, op_env, CADAR(list));
if(CAAR(list) == obj_unquote) {
pair = make_pair(insert, obj_empty);
@ -2030,7 +1964,7 @@ static obj_t entry_set(obj_t env, obj_t op_env, obj_t operator, obj_t operands)
binding = lookup(env, symbol);
if(binding == obj_undefined)
error("%s: applied to unbound symbol \"%s\"",
operator->operator.name, symbol->symbol.string);
operator->operator.name, symbol_name(symbol));
value = eval(env, op_env, CADR(operands));
CDR(binding) = value;
return value;
@ -2980,13 +2914,24 @@ static obj_t entry_eval(obj_t env, obj_t op_env, obj_t operator, obj_t operands)
}
static obj_t entry_error(obj_t env, obj_t op_env, obj_t operator, obj_t operands)
{
obj_t msg;
eval_args(operator->operator.name, env, op_env, operands, 1, &msg);
unless(TYPE(msg) == TYPE_STRING)
error("%s: argument must be a string", operator->operator.name);
error(msg->string.string);
return obj_undefined;
}
static obj_t entry_symbol_to_string(obj_t env, obj_t op_env, obj_t operator, obj_t operands)
{
obj_t symbol;
eval_args(operator->operator.name, env, op_env, operands, 1, &symbol);
unless(TYPE(symbol) == TYPE_SYMBOL)
error("%s: argument must be a symbol", operator->operator.name);
return make_string(symbol->symbol.length, symbol->symbol.string);
return symbol->symbol.name; /* safe because strings are immutable */
}
@ -2996,8 +2941,7 @@ static obj_t entry_string_to_symbol(obj_t env, obj_t op_env, obj_t operator, obj
eval_args(operator->operator.name, env, op_env, operands, 1, &string);
unless(TYPE(string) == TYPE_STRING)
error("%s: argument must be a string", operator->operator.name);
/* TODO: Should pass length to intern to avoid problems with NUL termination. */
return intern(string->string.string);
return intern_string(string);
}
@ -3664,6 +3608,7 @@ static struct {char *name; entry_t entry;} funtab[] = {
{"list->vector", entry_list_to_vector},
{"vector-fill!", entry_vector_fill},
{"eval", entry_eval},
{"error", entry_error},
{"symbol->string", entry_symbol_to_string},
{"string->symbol", entry_string_to_symbol},
{"string?", entry_stringp},
@ -3746,8 +3691,8 @@ static mps_res_t obj_scan(mps_ss_t ss, mps_addr_t base, mps_addr_t limit)
base = (char *)base + ALIGN(sizeof(integer_s));
break;
case TYPE_SYMBOL:
base = (char *)base +
ALIGN(offsetof(symbol_s, string) + obj->symbol.length + 1);
FIX(obj->symbol.name);
base = (char *)base + ALIGN(sizeof(symbol_s));
break;
case TYPE_SPECIAL:
base = (char *)base + ALIGN(sizeof(special_s));
@ -3829,8 +3774,7 @@ static mps_addr_t obj_skip(mps_addr_t base)
base = (char *)base + ALIGN(sizeof(integer_s));
break;
case TYPE_SYMBOL:
base = (char *)base +
ALIGN(offsetof(symbol_s, string) + obj->symbol.length + 1);
base = (char *)base + ALIGN(sizeof(symbol_s));
break;
case TYPE_SPECIAL:
base = (char *)base + ALIGN(sizeof(special_s));
@ -3985,7 +3929,6 @@ static mps_res_t buckets_scan(mps_ss_t ss, mps_addr_t base, mps_addr_t limit)
if (p == NULL && buckets->dependent) {
/* key/value was splatted: splat value/key too */
p = obj_deleted;
puts("splat!");
buckets->dependent->bucket[i] = p;
buckets->deleted += 2; /* tagged */
buckets->dependent->deleted += 2; /* tagged */
@ -4155,23 +4098,20 @@ static void *start(void *p, size_t s)
mps_root_t globals_root;
total = (size_t)0;
error_handler = &jb;
symtab_size = 16;
symtab = malloc(sizeof(obj_t) * symtab_size);
if(symtab == NULL) error("out of memory");
for(i = 0; i < symtab_size; ++i)
symtab[i] = NULL;
/* Note that since the symbol table is an exact root we must register
it with the MPS only after it has been initialized with scannable
pointers -- NULL in this case. Random values look like false
references into MPS memory and cause undefined behaviour (most likely
assertion failures). See topic/root. */
/* We must register the global variable 'symtab' as a root before
creating the symbol table, otherwise the symbol table might be
collected in the interval between creation and registration. But
we must also ensure that 'symtab' is valid before registration
(in this case, by setting it to NULL). See topic/root. */
symtab = NULL;
res = mps_root_create_table(&symtab_root, arena, mps_rank_exact(), 0,
(mps_addr_t *)symtab, symtab_size);
(mps_addr_t *)&symtab, 1);
if(res != MPS_RES_OK) error("Couldn't register symtab root");
error_handler = &jb;
/* The symbol table is strong-key weak-value. */
symtab = make_table(16, string_hash, string_equalp, 0, 1);
/* By contrast with the symbol table, we *must* register the globals as
roots before we start making things to put into them, because making