/* scheme.c -- SCHEME INTERPRETER EXAMPLE FOR THE MEMORY POOL SYSTEM * * Copyright (c) 2001-2014 Ravenbrook Limited. See end of file for license. * * TO DO * - unbounded integers, other number types. * - named let. * - quasiquote: vectors; nested; dotted. * - Lots of library. * - \#foo unsatisfactory in read and print */ #include #include #include #include #include #include #include #include #include /* LANGUAGE EXTENSION */ #define unless(c) if(!(c)) #define LENGTH(array) (sizeof(array) / sizeof(array[0])) /* CONFIGURATION PARAMETERS */ #define SYMMAX ((size_t)255) /* max length of a symbol */ #define MSGMAX ((size_t)255) /* max length of error message */ #define STRMAX ((size_t)255) /* max length of a string */ /* DATA TYPES */ /* obj_t -- scheme object type * * obj_t is a pointer to a union, obj_u, which has members for * each scheme representation. * * The obj_u also has a "type" member. Each representation * structure also has a "type" field first. ANSI C guarantees * that these type fields correspond [section?]. * * Objects are allocated by allocating one of the representation * structures and casting the pointer to it to type obj_t. This * allows objects of different sizes to be represented by the * same type. * * To access an object, check its type by reading TYPE(obj), then * access the fields of the representation, e.g. * if(TYPE(obj) == TYPE_PAIR) fiddle_with(CAR(obj)); */ typedef union obj_u *obj_t; typedef obj_t (*entry_t)(obj_t env, obj_t op_env, obj_t operator, obj_t rands); typedef int type_t; enum { TYPE_PAIR, TYPE_INTEGER, TYPE_SYMBOL, TYPE_SPECIAL, TYPE_OPERATOR, TYPE_STRING, TYPE_PORT, TYPE_PROMISE, TYPE_CHARACTER, TYPE_VECTOR, TYPE_TABLE, TYPE_BUCKETS }; typedef struct type_s { type_t type; } type_s; typedef struct pair_s { type_t type; /* TYPE_PAIR */ obj_t car, cdr; /* first and second projections */ } pair_s; typedef struct symbol_s { type_t type; /* TYPE_SYMBOL */ size_t length; /* length of symbol string (excl. NUL) */ char string[1]; /* symbol string, NUL terminated */ } symbol_s; typedef struct integer_s { type_t type; /* TYPE_INTEGER */ long integer; /* the integer */ } integer_s; typedef struct special_s { type_t type; /* TYPE_SPECIAL */ char *name; /* printed representation, NUL terminated */ } special_s; typedef struct operator_s { type_t type; /* TYPE_OPERATOR */ char *name; /* printed name, NUL terminated */ entry_t entry; /* entry point -- see eval() */ obj_t arguments, body; /* function arguments and code */ obj_t env, op_env; /* closure environments */ } operator_s; typedef struct string_s { type_t type; /* TYPE_STRING */ size_t length; /* number of chars in string */ char string[1]; /* string, NUL terminated */ } string_s; typedef struct port_s { type_t type; /* TYPE_PORT */ obj_t name; /* name of stream */ FILE *stream; } port_s; typedef struct character_s { type_t type; /* TYPE_CHARACTER */ char c; /* the character */ } character_s; typedef struct vector_s { type_t type; /* TYPE_VECTOR */ size_t length; /* number of elements */ obj_t vector[1]; /* vector elements */ } vector_s; typedef unsigned long (*hash_t)(obj_t obj); typedef int (*cmp_t)(obj_t obj1, obj_t obj2); typedef struct table_s { type_t type; /* TYPE_TABLE */ hash_t hash; /* hash function */ cmp_t cmp; /* comparison function */ obj_t buckets; /* hash buckets */ } table_s; typedef struct buckets_s { type_t type; /* TYPE_BUCKETS */ size_t length; /* number of buckets */ size_t used; /* number of buckets in use */ size_t deleted; /* number of deleted buckets */ struct bucket_s { obj_t key, value; } bucket[1]; /* hash buckets */ } buckets_s; typedef union obj_u { type_s type; /* one of TYPE_* */ pair_s pair; symbol_s symbol; integer_s integer; special_s special; operator_s operator; string_s string; port_s port; character_s character; vector_s vector; table_s table; buckets_s buckets; } obj_s; /* structure macros */ #define TYPE(obj) ((obj)->type.type) #define CAR(obj) ((obj)->pair.car) #define CDR(obj) ((obj)->pair.cdr) #define CAAR(obj) CAR(CAR(obj)) #define CADR(obj) CAR(CDR(obj)) #define CDAR(obj) CDR(CAR(obj)) #define CDDR(obj) CDR(CDR(obj)) #define CADDR(obj) CAR(CDDR(obj)) #define CDDDR(obj) CDR(CDDR(obj)) #define CDDAR(obj) CDR(CDAR(obj)) #define CADAR(obj) CAR(CDAR(obj)) /* GLOBAL DATA */ /* total -- total allocated bytes */ static size_t total; /* symtab -- symbol table * * The symbol table is a hash-table containing objects of TYPE_SYMBOL. * When a string is "interned" it is looked up in the table, and added * only if it is not there. This guarantees that all symbols which * are equal are actually the same object. */ static obj_t *symtab; static size_t symtab_size; /* special objects * * These global variables are initialized to point to objects of * TYPE_SPECIAL by main. They are used as markers for various * special purposes. */ static obj_t obj_empty; /* (), the empty list */ static obj_t obj_eof; /* end of file */ static obj_t obj_error; /* error indicator */ static obj_t obj_true; /* #t, boolean true */ static obj_t obj_false; /* #f, boolean false */ static obj_t obj_undefined; /* undefined result indicator */ static obj_t obj_tail; /* tail recursion indicator */ static obj_t obj_deleted; /* deleted key in hashtable */ /* predefined symbols * * These global variables are initialized to point to interned * objects of TYPE_SYMBOL. They have special meaning in the * Scheme language, and are used by the evaluator to parse code. */ static obj_t obj_quote; /* "quote" symbol */ static obj_t obj_quasiquote; /* "quasiquote" symbol */ static obj_t obj_lambda; /* "lambda" symbol */ static obj_t obj_begin; /* "begin" symbol */ static obj_t obj_else; /* "else" symbol */ static obj_t obj_unquote; /* "unquote" symbol */ static obj_t obj_unquote_splic; /* "unquote-splicing" symbol */ /* error handler * * The error_handler variable is initialized to point at a * jmp_buf to which the "error" function longjmps if there is * any kind of error during evaluation. It can be set up by * any enclosing function that wants to catch errors. There * is a default error handler in main, in the read-eval-print * loop. The error function also writes an error message * into "error_message" before longjmping, and this can be * displayed to the user when catching the error. * * [An error code should also be passed so that the error can * be decoded by enclosing code.] */ static jmp_buf *error_handler = NULL; static char error_message[MSGMAX+1]; /* SUPPORT FUNCTIONS */ /* error -- throw an error condition * * The "error" function takes a printf-style format string * and arguments, writes the message into error_message and * longjmps to *error_handler. There must be a setjmp at * the other end to catch the condition and display the * message. */ static void error(char *format, ...) { va_list args; va_start(args, format); vsnprintf(error_message, sizeof error_message, format, args); va_end(args); if (error_handler) { longjmp(*error_handler, 1); } else { fflush(stdout); fprintf(stderr, "Fatal error during initialization: %s\n", error_message); abort(); } } /* make_* -- object constructors * * Each object type has a function here which allocates an * instance of that type. */ static obj_t make_bool(int condition) { return condition ? obj_true : obj_false; } static obj_t make_pair(obj_t car, obj_t cdr) { obj_t obj = (obj_t)malloc(sizeof(pair_s)); if(obj == NULL) error("out of memory"); total += sizeof(pair_s); obj->pair.type = TYPE_PAIR; CAR(obj) = car; CDR(obj) = cdr; return obj; } static obj_t make_integer(long integer) { obj_t obj = (obj_t)malloc(sizeof(integer_s)); if(obj == NULL) error("out of memory"); total += sizeof(integer_s); obj->integer.type = TYPE_INTEGER; obj->integer.integer = integer; return obj; } static obj_t make_symbol(size_t length, char string[]) { size_t size = offsetof(symbol_s, string) + length+1; obj_t obj = (obj_t)malloc(size); if(obj == NULL) error("out of memory"); total += size; obj->symbol.type = TYPE_SYMBOL; obj->symbol.length = length; memcpy(obj->symbol.string, string, length+1); return obj; } static obj_t make_string(size_t length, char string[]) { size_t size = offsetof(string_s, string) + length+1; obj_t obj = (obj_t)malloc(size); if(obj == NULL) error("out of memory"); total += size; obj->string.type = TYPE_STRING; obj->string.length = length; if (string) memcpy(obj->string.string, string, length+1); else memset(obj->string.string, 0, length+1); return obj; } static obj_t make_special(char *string) { obj_t obj = (obj_t)malloc(sizeof(special_s)); if(obj == NULL) error("out of memory"); total += sizeof(special_s); obj->special.type = TYPE_SPECIAL; obj->special.name = string; return obj; } static obj_t make_operator(char *name, entry_t entry, obj_t arguments, obj_t body, obj_t env, obj_t op_env) { obj_t obj = (obj_t)malloc(sizeof(operator_s)); if(obj == NULL) error("out of memory"); total += sizeof(operator_s); obj->operator.type = TYPE_OPERATOR; obj->operator.name = name; obj->operator.entry = entry; obj->operator.arguments = arguments; obj->operator.body = body; obj->operator.env = env; obj->operator.op_env = op_env; return obj; } static obj_t make_port(obj_t name, FILE *stream) { obj_t obj = (obj_t)malloc(sizeof(port_s)); if(obj == NULL) error("out of memory"); total += sizeof(port_s); obj->port.type = TYPE_PORT; obj->port.name = name; obj->port.stream = stream; return obj; } static obj_t make_character(char c) { obj_t obj = (obj_t)malloc(sizeof(character_s)); if(obj == NULL) error("out of memory"); total += sizeof(character_s); obj->character.type = TYPE_CHARACTER; obj->character.c = c; return obj; } static obj_t make_vector(size_t length, obj_t fill) { size_t size = offsetof(vector_s, vector) + length * sizeof(obj_t); size_t i; obj_t obj = (obj_t)malloc(size); if(obj == NULL) error("out of memory"); total += size; obj->vector.type = TYPE_VECTOR; obj->vector.length = length; for(i = 0; i < length; ++i) obj->vector.vector[i] = fill; return obj; } static obj_t make_buckets(size_t length) { size_t i, size = offsetof(buckets_s, bucket) + length * 2 * sizeof(obj_t); obj_t obj = (obj_t)malloc(size); if(obj == NULL) error("out of memory"); total += size; obj->buckets.type = TYPE_BUCKETS; obj->buckets.length = length; obj->buckets.used = 0; obj->buckets.deleted = 0; for(i = 0; i < length; ++i) { obj->buckets.bucket[i].key = NULL; obj->buckets.bucket[i].value = NULL; } return obj; } static obj_t make_table(size_t length, hash_t hashf, cmp_t cmpf) { size_t l, size = sizeof(table_s); obj_t obj = (obj_t)malloc(size); if(obj == NULL) error("out of memory"); total += size; obj->table.type = TYPE_TABLE; obj->table.hash = hashf; obj->table.cmp = cmpf; /* round up to next power of 2 */ for(l = 1; l < length; l *= 2); obj->table.buckets = make_buckets(l); return obj; } /* getnbc -- get next non-blank char from stream */ static int getnbc(FILE *stream) { int c; do { c = getc(stream); if(c == ';') { do c = getc(stream); while(c != EOF && c != '\n'); } } while(isspace(c)); return c; } /* isealpha -- test for "extended alphabetic" char * * Scheme symbols may contain any "extended alphabetic" * character (see section 2.1 of R4RS). This function * returns non-zero if a character is in the set of * extended characters. */ static int isealpha(int c) { return strchr("+-.*/<=>!?:$%_&~^", c) != NULL; } /* hash -- hash a string to an unsigned long * * This hash function was derived (with permission) from * Paul Haahr's hash in the most excellent rc 1.4. */ static unsigned long hash(const char *s, size_t length) { char c; unsigned long h=0; size_t i = 0; switch(length % 4) { do { c=s[i++]; h+=(c<<17)^(c<<11)^(c<<5)^(c>>1); case 3: c=s[i++]; h^=(c<<14)+(c<<7)+(c<<4)+c; case 2: c=s[i++]; h^=(~c<<11)|((c<<3)^(c>>1)); case 1: c=s[i++]; h-=(c<<16)|(c<<9)|(c<<2)|(c&3); case 0: ; } while(i < length); } return h; } /* find -- find entry for symbol in symbol table * * Look for a symbol matching the string in the symbol table. * If the symbol was found, returns the address of the symbol * table entry which points to the symbol. Otherwise it * either returns the address of a NULL entry into which the * new symbol should be inserted, or NULL if the symbol table * is full. */ static obj_t *find(char *string) { unsigned long i, h, probe; h = hash(string, strlen(string)); probe = (h >> 8) | 1; h &= (symtab_size-1); i = h; do { if(symtab[i] == NULL || strcmp(string, symtab[i]->symbol.string) == 0) return &symtab[i]; i = (i+probe) & (symtab_size-1); } while(i != h); return NULL; } /* rehash -- double size of symbol table */ static void rehash(void) { obj_t *old_symtab = symtab; unsigned old_symtab_size = symtab_size; unsigned i; symtab_size *= 2; symtab = malloc(sizeof(obj_t) * symtab_size); if(symtab == NULL) error("out of memory"); /* Initialize the new table to NULL so that "find" will work. */ for(i = 0; i < symtab_size; ++i) symtab[i] = NULL; for(i = 0; i < old_symtab_size; ++i) if(old_symtab[i] != NULL) { obj_t *where = find(old_symtab[i]->symbol.string); assert(where != NULL); /* new table shouldn't be full */ assert(*where == NULL); /* shouldn't be in new table */ *where = old_symtab[i]; } free(old_symtab); } /* union-find string in symbol table, rehashing if necessary */ static obj_t intern(char *string) { obj_t *where; where = find(string); if(where == NULL) { rehash(); where = find(string); assert(where != NULL); /* shouldn't be full after rehash */ } if(*where == NULL) /* symbol not found in table */ *where = make_symbol(strlen(string), string); return *where; } /* Hash table implementation */ static unsigned long eq_hash(obj_t obj) { union {char s[sizeof(obj_t)]; obj_t addr;} u = {""}; u.addr = obj; return hash(u.s, sizeof(obj_t)); } static int eqp(obj_t obj1, obj_t obj2) { return obj1 == obj2; } static unsigned long eqv_hash(obj_t obj) { switch(TYPE(obj)) { case TYPE_INTEGER: return obj->integer.integer; case TYPE_CHARACTER: return obj->character.c; default: return eq_hash(obj); } } static int eqvp(obj_t obj1, obj_t obj2) { if (obj1 == obj2) return 1; if (TYPE(obj1) != TYPE(obj2)) return 0; switch(TYPE(obj1)) { case TYPE_INTEGER: return obj1->integer.integer == obj2->integer.integer; case TYPE_CHARACTER: return obj1->character.c == obj2->character.c; default: return 0; } } static unsigned long string_hash(obj_t obj) { unless(TYPE(obj) == TYPE_STRING) error("string-hash: argument must be a string"); return hash(obj->string.string, obj->string.length); } static int string_equalp(obj_t obj1, obj_t obj2) { return obj1 == obj2 || (TYPE(obj1) == TYPE_STRING && TYPE(obj2) == TYPE_STRING && obj1->string.length == obj2->string.length && 0 == strcmp(obj1->string.string, obj2->string.string)); } static struct bucket_s *buckets_find(obj_t tbl, obj_t buckets, obj_t key) { unsigned long i, h, probe; struct bucket_s *result = NULL; assert(TYPE(tbl) == TYPE_TABLE); assert(TYPE(buckets) == TYPE_BUCKETS); h = tbl->table.hash(key); probe = (h >> 8) | 1; h &= (buckets->buckets.length-1); i = h; do { struct bucket_s *b = &buckets->buckets.bucket[i]; if(b->key == NULL || tbl->table.cmp(b->key, key)) return b; if(result == NULL && b->key == obj_deleted) result = b; i = (i+probe) & (buckets->buckets.length-1); } while(i != h); return result; } static size_t table_size(obj_t tbl) { size_t used, deleted; assert(TYPE(tbl) == TYPE_TABLE); used = tbl->table.buckets->buckets.used; deleted = tbl->table.buckets->buckets.deleted; assert(used >= deleted); return used - deleted; } static void table_rehash(obj_t tbl) { size_t i, old_length, new_length; obj_t new_buckets; assert(TYPE(tbl) == TYPE_TABLE); old_length = tbl->table.buckets->buckets.length; new_length = old_length * 2; new_buckets = make_buckets(new_length); for (i = 0; i < old_length; ++i) { struct bucket_s *old_b = &tbl->table.buckets->buckets.bucket[i]; if (old_b->key != NULL && old_b->key != obj_deleted) { struct bucket_s *b = buckets_find(tbl, new_buckets, old_b->key); assert(b != NULL); /* new table shouldn't be full */ assert(b->key == NULL); /* shouldn't be in new table */ *b = *old_b; ++ new_buckets->buckets.used; } } assert(new_buckets->buckets.used == table_size(tbl)); tbl->table.buckets = new_buckets; } static obj_t table_ref(obj_t tbl, obj_t key) { struct bucket_s *b; assert(TYPE(tbl) == TYPE_TABLE); b = buckets_find(tbl, tbl->table.buckets, key); if (b && b->key != NULL && b->key != obj_deleted) return b->value; return NULL; } static int table_full(obj_t tbl) { assert(TYPE(tbl) == TYPE_TABLE); return tbl->table.buckets->buckets.used >= tbl->table.buckets->buckets.length / 2; } static void table_set(obj_t tbl, obj_t key, obj_t value) { struct bucket_s *b; assert(TYPE(tbl) == TYPE_TABLE); if (table_full(tbl) || (b = buckets_find(tbl, tbl->table.buckets, key)) == NULL) { table_rehash(tbl); b = buckets_find(tbl, tbl->table.buckets, key); assert(b != NULL); /* shouldn't be full after rehash */ } if (b->key == NULL) { b->key = key; ++ tbl->table.buckets->buckets.used; } else if (b->key == obj_deleted) { b->key = key; assert(tbl->table.buckets->buckets.deleted > 0); -- tbl->table.buckets->buckets.deleted; } b->value = value; } static void table_delete(obj_t tbl, obj_t key) { struct bucket_s *b; assert(TYPE(tbl) == TYPE_TABLE); b = buckets_find(tbl, tbl->table.buckets, key); if (b != NULL && b->key != NULL) { b->key = obj_deleted; ++ tbl->table.buckets->buckets.deleted; } } static void print(obj_t obj, unsigned depth, FILE *stream) { switch(TYPE(obj)) { case TYPE_INTEGER: { fprintf(stream, "%ld", obj->integer.integer); } break; case TYPE_SYMBOL: { fputs(obj->symbol.string, stream); } break; case TYPE_SPECIAL: { fputs(obj->special.name, stream); } break; case TYPE_PORT: { assert(TYPE(obj->port.name) == TYPE_STRING); fprintf(stream, "#[port \"%s\"]", obj->port.name->string.string); } break; case TYPE_STRING: { size_t i; putc('"', stream); for(i = 0; i < obj->string.length; ++i) { char c = obj->string.string[i]; switch(c) { case '\\': fputs("\\\\", stream); break; case '"': fputs("\\\"", stream); break; default: putc(c, stream); break; } } putc('"', stream); } break; case TYPE_PROMISE: { assert(CAR(obj) == obj_true || CAR(obj) == obj_false); fprintf(stream, "#[%sevaluated promise ", CAR(obj) == obj_false ? "un" : ""); print(CDR(obj), depth - 1, stream); putc(']', stream); } break; case TYPE_PAIR: { if(TYPE(CAR(obj)) == TYPE_SYMBOL && TYPE(CDR(obj)) == TYPE_PAIR && CDDR(obj) == obj_empty) { if(CAR(obj) == obj_quote) { putc('\'', stream); if(depth == 0) fputs("...", stream); else print(CADR(obj), depth - 1, stream); break; } if(CAR(obj) == obj_quasiquote) { putc('`', stream); if(depth == 0) fputs("...", stream); else print(CADR(obj), depth - 1, stream); break; } if(CAR(obj) == obj_unquote) { putc(',', stream); if(depth == 0) fputs("...", stream); else print(CADR(obj), depth - 1, stream); break; } if(CAR(obj) == obj_unquote_splic) { fputs(",@", stream); if(depth == 0) fputs("...", stream); else print(CADR(obj), depth - 1, stream); break; } } putc('(', stream); if(depth == 0) fputs("...", stream); else { for(;;) { print(CAR(obj), depth - 1, stream); obj = CDR(obj); if(TYPE(obj) != TYPE_PAIR) break; putc(' ', stream); } if(obj != obj_empty) { fputs(" . ", stream); print(obj, depth - 1, stream); } } putc(')', stream); } break; case TYPE_VECTOR: { fputs("#(", stream); if(depth == 0) fputs("...", stream); else { size_t i; for(i = 0; i < obj->vector.length; ++i) { if(i > 0) putc(' ', stream); print(obj->vector.vector[i], depth - 1, stream); } } putc(')', stream); } break; case TYPE_BUCKETS: { size_t i; for(i = 0; i < obj->buckets.length; ++i) { struct bucket_s *b = &obj->buckets.bucket[i]; if(b->key != NULL && b->key != obj_deleted) { fputs(" (", stream); print(b->key, depth - 1, stream); putc(' ', stream); print(b->value, depth - 1, stream); putc(')', stream); } } } break; case TYPE_TABLE: { fputs("#[hashtable", stream); print(obj->table.buckets, depth - 1, stream); putc(']', stream); } break; case TYPE_OPERATOR: { fprintf(stream, "#[operator \"%s\" %p ", obj->operator.name, (void *)obj); if(depth == 0) fputs("...", stream); else { print(obj->operator.arguments, depth - 1, stream); putc(' ', stream); print(obj->operator.body, depth - 1, stream); putc(' ', stream); print(obj->operator.env, depth - 1, stream); putc(' ', stream); print(obj->operator.op_env, depth - 1, stream); } putc(']', stream); } break; case TYPE_CHARACTER: { fprintf(stream, "#\\%c", obj->character.c); } break; default: assert(0); abort(); } } static obj_t read_integer(FILE *stream, int c) { long integer = 0; do { integer = integer*10 + c-'0'; c = getc(stream); } while(isdigit(c)); ungetc(c, stream); return make_integer(integer); } static obj_t read_symbol(FILE *stream, int c) { int length = 0; char string[SYMMAX+1]; do { string[length++] = tolower(c); c = getc(stream); } while(length < SYMMAX && (isalnum(c) || isealpha(c))); if(isalnum(c) || isealpha(c)) error("read: symbol too long"); string[length] = '\0'; ungetc(c, stream); return intern(string); } static obj_t read_string(FILE *stream, int c) { int length = 0; char string[STRMAX+1]; for(;;) { c = getc(stream); if(c == EOF) error("read: end of file during string"); if(c == '"') break; if(length >= STRMAX) error("read: string too long"); if(c == '\\') { c = getc(stream); switch(c) { case '\\': break; case '"': break; case 'n': c = '\n'; break; case 't': c = '\t'; break; case EOF: error("read: end of file in escape sequence in string"); default: error("read: unknown escape '%c'", c); } } string[length++] = c; } string[length] = '\0'; return make_string(length, string); } static obj_t read(FILE *stream); static obj_t read_quote(FILE *stream, int c) { return make_pair(obj_quote, make_pair(read(stream), obj_empty)); } static obj_t read_quasiquote(FILE *stream, int c) { return make_pair(obj_quasiquote, make_pair(read(stream), obj_empty)); } static obj_t read_unquote(FILE *stream, int c) { c = getc(stream); if(c == '@') return make_pair(obj_unquote_splic, make_pair(read(stream), obj_empty)); ungetc(c, stream); return make_pair(obj_unquote, make_pair(read(stream), obj_empty)); } static obj_t read_list(FILE *stream, int c) { obj_t list, new, end; list = obj_empty; end = NULL; /* suppress "uninitialized" warning in GCC */ for(;;) { c = getnbc(stream); if(c == ')' || c == '.' || c == EOF) break; ungetc(c, stream); new = make_pair(read(stream), obj_empty); if(list == obj_empty) { list = new; end = new; } else { CDR(end) = new; end = new; } } if(c == '.') { if(list == obj_empty) error("read: unexpected dot"); CDR(end) = read(stream); c = getnbc(stream); } if(c != ')') error("read: expected close parenthesis"); return list; } static obj_t list_to_vector(obj_t list) { size_t i; obj_t l, vector; i = 0; l = list; while(TYPE(l) == TYPE_PAIR) { ++i; l = CDR(l); } if(l != obj_empty) return obj_error; vector = make_vector(i, obj_undefined); i = 0; l = list; while(TYPE(l) == TYPE_PAIR) { vector->vector.vector[i] = CAR(l); ++i; l = CDR(l); } return vector; } static obj_t read_special(FILE *stream, int c) { c = getnbc(stream); switch(tolower(c)) { case 't': return obj_true; case 'f': return obj_false; case '\\': { /* character (R4RS 6.6) */ c = getc(stream); if(c == EOF) error("read: end of file reading character literal"); return make_character(c); } case '(': { /* vector (R4RS 6.8) */ obj_t list = read_list(stream, c); obj_t vector = list_to_vector(list); if(vector == obj_error) error("read: illegal vector syntax"); return vector; } } error("read: unknown special '%c'", c); return obj_error; } static obj_t read(FILE *stream) { int c; c = getnbc(stream); if(c == EOF) return obj_eof; if(isdigit(c)) return read_integer(stream, c); switch(c) { case '\'': return read_quote(stream, c); case '`': return read_quasiquote(stream, c); case ',': return read_unquote(stream, c); case '(': return read_list(stream, c); case '#': return read_special(stream, c); case '"': return read_string(stream, c); case '-': case '+': { int next = getc(stream); if(isdigit(next)) { obj_t integer = read_integer(stream, next); if(c == '-') integer->integer.integer = -integer->integer.integer; return integer; } ungetc(next, stream); } break; /* fall through to read as symbol */ } if(isalpha(c) || isealpha(c)) return read_symbol(stream, c); error("read: illegal char '%c'", c); return obj_error; } /* lookup_in_frame -- look up a symbol in single frame * * Search a single frame of the environment for a symbol binding. */ static obj_t lookup_in_frame(obj_t frame, obj_t symbol) { while(frame != obj_empty) { assert(TYPE(frame) == TYPE_PAIR); assert(TYPE(CAR(frame)) == TYPE_PAIR); assert(TYPE(CAAR(frame)) == TYPE_SYMBOL); if(CAAR(frame) == symbol) return CAR(frame); frame = CDR(frame); } return obj_undefined; } /* lookup -- look up symbol in environment * * Search an entire environment for a binding of a symbol. */ static obj_t lookup(obj_t env, obj_t symbol) { obj_t binding; while(env != obj_empty) { assert(TYPE(env) == TYPE_PAIR); binding = lookup_in_frame(CAR(env), symbol); if(binding != obj_undefined) return binding; env = CDR(env); } return obj_undefined; } /* define -- define symbol in environment * * In Scheme, define will actually rebind (i.e. set) a symbol in the * same frame of the environment, or add a binding if it wasn't already * set. This has the effect of making bindings local to functions * (see how entry_interpret adds an empty frame to the environments), * allowing recursion, and allowing redefinition at the top level. * See R4R2 section 5.2 for details. */ static void define(obj_t env, obj_t symbol, obj_t value) { obj_t binding; assert(TYPE(env) == TYPE_PAIR); /* always at least one frame */ binding = lookup_in_frame(CAR(env), symbol); if(binding != obj_undefined) CDR(binding) = value; else CAR(env) = make_pair(make_pair(symbol, value), CAR(env)); } static obj_t eval(obj_t env, obj_t op_env, obj_t exp); static obj_t eval(obj_t env, obj_t op_env, obj_t exp) { for(;;) { obj_t operator; obj_t result; /* self-evaluating */ if(TYPE(exp) == TYPE_INTEGER || (TYPE(exp) == TYPE_SPECIAL && exp != obj_empty) || TYPE(exp) == TYPE_STRING || TYPE(exp) == TYPE_CHARACTER || TYPE(exp) == TYPE_OPERATOR) return exp; /* symbol lookup */ if(TYPE(exp) == TYPE_SYMBOL) { obj_t binding = lookup(env, exp); if(binding == obj_undefined) error("eval: unbound symbol \"%s\"", exp->symbol.string); return CDR(binding); } if(TYPE(exp) != TYPE_PAIR) { error("eval: unknown syntax"); return obj_error; } /* apply operator or function */ if(TYPE(CAR(exp)) == TYPE_SYMBOL) { obj_t binding = lookup(op_env, CAR(exp)); if(binding != obj_undefined) { operator = CDR(binding); assert(TYPE(operator) == TYPE_OPERATOR); result = (*operator->operator.entry)(env, op_env, operator, CDR(exp)); goto found; } } operator = eval(env, op_env, CAR(exp)); unless(TYPE(operator) == TYPE_OPERATOR) error("eval: application of non-function"); result = (*operator->operator.entry)(env, op_env, operator, CDR(exp)); found: if (!(TYPE(result) == TYPE_PAIR && CAR(result) == obj_tail)) return result; env = CADR(result); op_env = CADDR(result); exp = CAR(CDDDR(result)); } } static obj_t load(obj_t env, obj_t op_env, const char *filename) { obj_t result = obj_undefined; FILE *stream = fopen(filename, "r"); if(stream == NULL) error("load: cannot open %s: %s", filename, strerror(errno)); for(;;) { obj_t obj = read(stream); if(obj == obj_eof) break; result = eval(env, op_env, obj); } /* TODO: if there was an error, this doesn't get closed */ fclose(stream); return result; } /* OPERATOR UTILITIES */ /* eval_list -- evaluate list of expressions giving list of results * * eval_list evaluates a list of expressions and yields a list of their * results, in order. If the list is badly formed, an error is thrown * using the message given. */ static obj_t eval_list(obj_t env, obj_t op_env, obj_t list, char *message) { obj_t result, end, pair; result = obj_empty; end = NULL; /* suppress "uninitialized" warning in GCC */ while(list != obj_empty) { if(TYPE(list) != TYPE_PAIR) error(message); pair = make_pair(eval(env, op_env, CAR(list)), obj_empty); if(result == obj_empty) result = pair; else CDR(end) = pair; end = pair; list = CDR(list); } return result; } /* eval_args1 -- evaluate some operator arguments * * See eval_args and eval_args_rest for usage. */ static obj_t eval_args1(char *name, obj_t env, obj_t op_env, obj_t operands, unsigned n, va_list args) { unsigned i; for(i = 0; i < n; ++i) { unless(TYPE(operands) == TYPE_PAIR) error("eval: too few arguments to %s", name); *va_arg(args, obj_t *) = eval(env, op_env, CAR(operands)); operands = CDR(operands); } return operands; } /* eval_args -- evaluate operator arguments without rest list * * eval_args evaluates the first "n" expressions from the list of * expressions in "operands", returning the rest of the operands * unevaluated. It puts the results of evaluation in the addresses * passed in the vararg list. If the operands list is badly formed * an error is thrown using the operator name passed. For example: * * eval_args("foo", env, op_env, operands, 2, &arg1, &arg2); */ static void eval_args(char *name, obj_t env, obj_t op_env, obj_t operands, unsigned n, ...) { va_list args; va_start(args, n); operands = eval_args1(name, env, op_env, operands, n, args); unless(operands == obj_empty) error("eval: too many arguments to %s", name); va_end(args); } /* eval_args_rest -- evaluate operator arguments with rest list * * eval_args_rest evaluates the first "n" expressions from the list of * expressions in "operands", then evaluates the rest of the operands * using eval_list and puts the result at *restp. It puts the results * of evaluating the first "n" operands in the addresses * passed in the vararg list. If the operands list is badly formed * an error is thrown using the operator name passed. For example: * * eval_args_rest("foo", env, op_env, operands, &rest, 2, &arg1, &arg2); */ static void eval_args_rest(char *name, obj_t env, obj_t op_env, obj_t operands, obj_t *restp, unsigned n, ...) { va_list args; va_start(args, n); operands = eval_args1(name, env, op_env, operands, n, args); va_end(args); *restp = eval_list(env, op_env, operands, "eval: badly formed argument list"); } /* eval_tail -- return an object that will cause eval to loop * * Rather than calling `eval` an operator can return a special object that * causes a calling `eval` to loop, avoiding using up a C stack frame. * This implements tail recursion (in a simple way). */ static obj_t eval_tail(obj_t env, obj_t op_env, obj_t exp) { return make_pair(obj_tail, make_pair(env, make_pair(op_env, make_pair(exp, obj_empty)))); } /* eval_body -- evaluate a list of expressions, returning last result * * This is used for the bodies of forms such as let, begin, etc. where * a list of expressions is allowed. */ static obj_t eval_body(obj_t env, obj_t op_env, obj_t operator, obj_t body) { for (;;) { if (TYPE(body) != TYPE_PAIR) error("%s: illegal expression list", operator->operator.name); if (CDR(body) == obj_empty) return eval_tail(env, op_env, CAR(body)); (void)eval(env, op_env, CAR(body)); body = CDR(body); } } /* BUILT-IN OPERATORS */ /* entry_interpret -- interpreted function entry point * * When a function is made using lambda (see entry_lambda) an operator * is created with entry_interpret as its entry point, and the arguments * and body of the function. The entry_interpret function evaluates * the operands of the function and binds them to the argument names * in a new frame added to the lambda's closure environment. It then * evaluates the body in that environment, executing the function. */ static obj_t entry_interpret(obj_t env, obj_t op_env, obj_t operator, obj_t operands) { obj_t arguments, fun_env, fun_op_env; assert(TYPE(operator) == TYPE_OPERATOR); /* Make a new frame so that bindings are local to the function. */ /* Arguments will be bound in this new frame. */ fun_env = make_pair(obj_empty, operator->operator.env); fun_op_env = make_pair(obj_empty, operator->operator.op_env); arguments = operator->operator.arguments; while(operands != obj_empty) { if(arguments == obj_empty) error("eval: function applied to too many arguments"); if(TYPE(arguments) == TYPE_SYMBOL) { define(fun_env, arguments, eval_list(env, op_env, operands, "eval: badly formed argument list")); operands = obj_empty; arguments = obj_empty; } else { assert(TYPE(arguments) == TYPE_PAIR && TYPE(CAR(arguments)) == TYPE_SYMBOL); define(fun_env, CAR(arguments), eval(env, op_env, CAR(operands))); operands = CDR(operands); arguments = CDR(arguments); } } if(arguments != obj_empty) error("eval: function applied to too few arguments"); return eval_tail(fun_env, fun_op_env, operator->operator.body); } /* entry_quote -- return operands unevaluated * * In Scheme, (quote foo) evaluates to foo (i.e. foo is not evaluated). * See R4RS 4.1.2. The reader expands "'x" to "(quote x)". */ static obj_t entry_quote(obj_t env, obj_t op_env, obj_t operator, obj_t operands) { unless(TYPE(operands) == TYPE_PAIR && CDR(operands) == obj_empty) error("%s: illegal syntax", operator->operator.name); return CAR(operands); } /* entry_define -- bind a symbol in the top frame of the environment * * In Scheme, "(define )" evaluates expressions * and binds it to symbol in the top frame of the environment (see * R4RS 5.2). This code also allows the non-essential syntax for * define, "(define ( ) )" as a short-hand for * "(define (lambda () ))". */ static obj_t entry_define(obj_t env, obj_t op_env, obj_t operator, obj_t operands) { obj_t symbol = NULL, value = NULL; unless(TYPE(operands) == TYPE_PAIR && TYPE(CDR(operands)) == TYPE_PAIR) error("%s: illegal syntax", operator->operator.name); if(TYPE(CAR(operands)) == TYPE_SYMBOL) { unless(CDDR(operands) == obj_empty) error("%s: too many arguments", operator->operator.name); symbol = CAR(operands); value = eval(env, op_env, CADR(operands)); } else if(TYPE(CAR(operands)) == TYPE_PAIR && TYPE(CAAR(operands)) == TYPE_SYMBOL) { symbol = CAAR(operands); value = eval(env, op_env, make_pair(obj_lambda, make_pair(CDAR(operands), CDR(operands)))); } else error("%s: applied to binder", operator->operator.name); define(env, symbol, value); return symbol; } /* entry_if -- one- or two-armed conditional * * "(if )" and "(if )". * See R4RS 4.1.5. */ static obj_t entry_if(obj_t env, obj_t op_env, obj_t operator, obj_t operands) { obj_t test; unless(TYPE(operands) == TYPE_PAIR && TYPE(CDR(operands)) == TYPE_PAIR && (CDDR(operands) == obj_empty || (TYPE(CDDR(operands)) == TYPE_PAIR && CDDDR(operands) == obj_empty))) error("%s: illegal syntax", operator->operator.name); test = eval(env, op_env, CAR(operands)); /* Anything which is not #f counts as true [R4RS 6.1]. */ if(test != obj_false) return eval_tail(env, op_env, CADR(operands)); if(TYPE(CDDR(operands)) == TYPE_PAIR) return eval_tail(env, op_env, CADDR(operands)); return obj_undefined; } /* entry_cond -- general conditional * * "(cond ( ...) ( ...) ... [(else ...)])" */ static obj_t entry_cond(obj_t env, obj_t op_env, obj_t operator, obj_t operands) { unless(TYPE(operands) == TYPE_PAIR) error("%s: illegal syntax", operator->operator.name); while(TYPE(operands) == TYPE_PAIR) { obj_t clause = CAR(operands); obj_t result; unless(TYPE(clause) == TYPE_PAIR && TYPE(CDR(clause)) == TYPE_PAIR) error("%s: illegal clause syntax", operator->operator.name); if(CAR(clause) == obj_else) { unless(CDR(operands) == obj_empty) error("%s: else clause must come last", operator->operator.name); result = obj_true; } else result = eval(env, op_env, CAR(clause)); if(result != obj_false) { if (CDR(clause) == obj_empty) return result; return eval_body(env, op_env, operator, CDR(clause)); } operands = CDR(operands); } return obj_undefined; } /* entry_and -- (and ...) */ static obj_t entry_and(obj_t env, obj_t op_env, obj_t operator, obj_t operands) { obj_t test; if (operands == obj_empty) return obj_true; do { if (TYPE(operands) != TYPE_PAIR) error("%s: illegal syntax", operator->operator.name); if (CDR(operands) == obj_empty) return eval_tail(env, op_env, CAR(operands)); test = eval(env, op_env, CAR(operands)); operands = CDR(operands); } while (test != obj_false); return test; } /* entry_or -- (or ...) */ static obj_t entry_or(obj_t env, obj_t op_env, obj_t operator, obj_t operands) { obj_t test; if (operands == obj_empty) return obj_false; do { if (TYPE(operands) != TYPE_PAIR) error("%s: illegal syntax", operator->operator.name); if (CDR(operands) == obj_empty) return eval_tail(env, op_env, CAR(operands)); test = eval(env, op_env, CAR(operands)); operands = CDR(operands); } while (test == obj_false); return test; } /* entry_let -- (let ) */ /* TODO: Too much common code with let* */ static obj_t entry_let(obj_t env, obj_t op_env, obj_t operator, obj_t operands) { obj_t inner_env, bindings; unless(TYPE(operands) == TYPE_PAIR && TYPE(CDR(operands)) == TYPE_PAIR) error("%s: illegal syntax", operator->operator.name); inner_env = make_pair(obj_empty, env); /* TODO: common with interpret */ bindings = CAR(operands); while(TYPE(bindings) == TYPE_PAIR) { obj_t binding = CAR(bindings); unless(TYPE(binding) == TYPE_PAIR && TYPE(CAR(binding)) == TYPE_SYMBOL && TYPE(CDR(binding)) == TYPE_PAIR && CDDR(binding) == obj_empty) error("%s: illegal binding", operator->operator.name); define(inner_env, CAR(binding), eval(env, op_env, CADR(binding))); bindings = CDR(bindings); } if(bindings != obj_empty) error("%s: illegal bindings list", operator->operator.name); return eval_body(inner_env, op_env, operator, CDR(operands)); } /* entry_let_star -- (let* ) */ /* TODO: Too much common code with let */ static obj_t entry_let_star(obj_t env, obj_t op_env, obj_t operator, obj_t operands) { obj_t inner_env, bindings; unless(TYPE(operands) == TYPE_PAIR && TYPE(CDR(operands)) == TYPE_PAIR) error("%s: illegal syntax", operator->operator.name); inner_env = make_pair(obj_empty, env); /* TODO: common with interpret */ bindings = CAR(operands); while(TYPE(bindings) == TYPE_PAIR) { obj_t binding = CAR(bindings); unless(TYPE(binding) == TYPE_PAIR && TYPE(CAR(binding)) == TYPE_SYMBOL && TYPE(CDR(binding)) == TYPE_PAIR && CDDR(binding) == obj_empty) error("%s: illegal binding", operator->operator.name); define(inner_env, CAR(binding), eval(inner_env, op_env, CADR(binding))); bindings = CDR(bindings); } if(bindings != obj_empty) error("%s: illegal bindings list", operator->operator.name); return eval_body(inner_env, op_env, operator, CDR(operands)); } /* entry_letrec -- (letrec ) */ /* TODO: Too much common code with let and let* */ static obj_t entry_letrec(obj_t env, obj_t op_env, obj_t operator, obj_t operands) { obj_t inner_env, bindings; unless(TYPE(operands) == TYPE_PAIR && TYPE(CDR(operands)) == TYPE_PAIR) error("%s: illegal syntax", operator->operator.name); inner_env = make_pair(obj_empty, env); /* TODO: common with interpret */ bindings = CAR(operands); while(TYPE(bindings) == TYPE_PAIR) { obj_t binding = CAR(bindings); unless(TYPE(binding) == TYPE_PAIR && TYPE(CAR(binding)) == TYPE_SYMBOL && TYPE(CDR(binding)) == TYPE_PAIR && CDDR(binding) == obj_empty) error("%s: illegal binding", operator->operator.name); define(inner_env, CAR(binding), obj_undefined); bindings = CDR(bindings); } if(bindings != obj_empty) error("%s: illegal bindings list", operator->operator.name); bindings = CAR(operands); while(TYPE(bindings) == TYPE_PAIR) { obj_t binding = CAR(bindings); define(inner_env, CAR(binding), eval(inner_env, op_env, CADR(binding))); bindings = CDR(bindings); } return eval_body(inner_env, op_env, operator, CDR(operands)); } /* entry_do -- (do (( ) ...) ( ...) ...) * Do is an iteration construct. It specifies a set of variables to be * bound, how they are to be initialized at the start, and how they * are to be updated on each iteration. When a termination condition * is met, the loop exits with a specified result value. * See R4RS 4.2.4. */ static obj_t entry_do(obj_t env, obj_t op_env, obj_t operator, obj_t operands) { obj_t inner_env, next_env, bindings; unless(TYPE(operands) == TYPE_PAIR && TYPE(CDR(operands)) == TYPE_PAIR && TYPE(CADR(operands)) == TYPE_PAIR) error("%s: illegal syntax", operator->operator.name); inner_env = make_pair(obj_empty, env); /* Do expressions are evaluated as follows: The expressions are evaluated (in some unspecified order), the s are bound to fresh locations, the results of the expressions are stored in the bindings of the s, and then the iteration phase begins. */ bindings = CAR(operands); while(TYPE(bindings) == TYPE_PAIR) { obj_t binding = CAR(bindings); unless(TYPE(binding) == TYPE_PAIR && TYPE(CAR(binding)) == TYPE_SYMBOL && TYPE(CDR(binding)) == TYPE_PAIR && (CDDR(binding) == obj_empty || (TYPE(CDDR(binding)) == TYPE_PAIR && CDDDR(binding) == obj_empty))) error("%s: illegal binding", operator->operator.name); define(inner_env, CAR(binding), eval(env, op_env, CADR(binding))); bindings = CDR(bindings); } for(;;) { /* Each iteration begins by evaluating ; */ obj_t test = CADR(operands); if(eval(inner_env, op_env, CAR(test)) == obj_false) { /* if the result is false (see section see section 6.1 Booleans), then the expressions are evaluated in order for effect, */ obj_t commands = CDDR(operands); while(TYPE(commands) == TYPE_PAIR) { eval(inner_env, op_env, CAR(commands)); commands = CDR(commands); } unless(commands == obj_empty) error("%s: illegal syntax", operator->operator.name); /* the expressions are evaluated in some unspecified order, the s are bound to fresh locations, the results of the s are stored in the bindings of the s, and the next iteration begins. */ bindings = CAR(operands); next_env = make_pair(obj_empty, inner_env); while(TYPE(bindings) == TYPE_PAIR) { obj_t binding = CAR(bindings); unless(CDDR(binding) == obj_empty) define(next_env, CAR(binding), eval(inner_env, op_env, CADDR(binding))); bindings = CDR(bindings); } inner_env = next_env; } else { /* If evaluates to a true value, then the s are evaluated from left to right and the value of the last is returned as the value of the do expression. If no s are present, then the value of the do expression is unspecified. */ obj_t result = obj_undefined; test = CDR(test); while(TYPE(test) == TYPE_PAIR) { result = eval(inner_env, op_env, CAR(test)); test = CDR(test); } unless(test == obj_empty) error("%s: illegal syntax", operator->operator.name); return result; } } } /* entry_delay -- (delay ) */ static obj_t entry_delay(obj_t env, obj_t op_env, obj_t operator, obj_t operands) { obj_t promise; unless(TYPE(operands) == TYPE_PAIR && CDR(operands) == obj_empty) error("%s: illegal syntax", operator->operator.name); promise = make_pair(obj_false, make_operator("anonymous promise", entry_interpret, obj_empty, CAR(operands), env, op_env)); TYPE(promise) = TYPE_PROMISE; return promise; } static obj_t quasiquote(obj_t env, obj_t op_env, obj_t operator, obj_t arg) { obj_t result = obj_empty, end = NULL, insert; unless(TYPE(arg) == TYPE_PAIR) return arg; while(TYPE(arg) == TYPE_PAIR) { if(TYPE(CAR(arg)) == TYPE_PAIR && TYPE(CAAR(arg)) == TYPE_SYMBOL && (CAAR(arg) == obj_unquote || CAAR(arg) == obj_unquote_splic)) { unless(TYPE(CDAR(arg)) == TYPE_PAIR && CDDAR(arg) == obj_empty) error("%s: illegal %s syntax", operator->operator.name, CAAR(arg)->symbol.string); insert = eval(env, op_env, CADAR(arg)); if(CAAR(arg) == obj_unquote) { obj_t pair = make_pair(insert, obj_empty); if(result == obj_empty) result = pair; if(end) CDR(end) = pair; end = pair; } else if(CAAR(arg) == obj_unquote_splic) { while(TYPE(insert) == TYPE_PAIR) { obj_t pair = make_pair(CAR(insert), obj_empty); if(result == obj_empty) result = pair; if(end) CDR(end) = pair; end = pair; insert = CDR(insert); } if(insert != obj_empty) error("%s: %s expression must return list", operator->operator.name, CAAR(arg)->symbol.string); } } else { obj_t pair = make_pair(quasiquote(env, op_env, operator, CAR(arg)), obj_empty); if(result == obj_empty) result = pair; if(end) CDR(end) = pair; end = pair; } arg = CDR(arg); } return result; } /* entry_quasiquote -- (quasiquote