mirror of
https://gitlab.com/embeddable-common-lisp/ecl.git
synced 2025-12-05 18:30:24 -08:00
266 lines
11 KiB
Common Lisp
266 lines
11 KiB
Common Lisp
;;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: SYSTEM -*-
|
|
;;;;
|
|
;;;; Copyright (c) 2011, Juan Jose Garcia-Ripoll.
|
|
;;;;
|
|
;;;; This program is free software; you can redistribute it and/or
|
|
;;;; modify it under the terms of the GNU Library General Public
|
|
;;;; License as published by the Free Software Foundation; either
|
|
;;;; version 2 of the License, or (at your option) any later version.
|
|
;;;;
|
|
;;;; See file '../Copyright' for full details.
|
|
;;;;
|
|
;;;; This file implements the CDB specification, which you find here
|
|
;;;; http://cr.yp.to/cdb/cdb.txt and is reproduced below, replicating
|
|
;;;; the interface developed by Zach Beane under the name ZCDB.
|
|
;;;;
|
|
;;;; A structure for constant databases
|
|
;;;; 19960914
|
|
;;;; Copyright 1996
|
|
;;;; D. J. Bernstein, djb@pobox.com
|
|
;;;;
|
|
;;;; A cdb is an associative array: it maps strings (``keys'') to strings
|
|
;;;; (``data'').
|
|
;;;;
|
|
;;;; A cdb contains 256 pointers to linearly probed open hash tables. The
|
|
;;;; hash tables contain pointers to (key,data) pairs. A cdb is stored in
|
|
;;;; a single file on disk:
|
|
;;;;
|
|
;;;; +----------------+---------+-------+-------+-----+---------+
|
|
;;;; | p0 p1 ... p255 | records | hash0 | hash1 | ... | hash255 |
|
|
;;;; +----------------+---------+-------+-------+-----+---------+
|
|
;;;;
|
|
;;;; Each of the 256 initial pointers states a position and a length. The
|
|
;;;; position is the starting byte position of the hash table. The length
|
|
;;;; is the number of slots in the hash table.
|
|
;;;;
|
|
;;;; Records are stored sequentially, without special alignment. A record
|
|
;;;; states a key length, a data length, the key, and the data.
|
|
;;;;
|
|
;;;; Each hash table slot states a hash value and a byte position. If the
|
|
;;;; byte position is 0, the slot is empty. Otherwise, the slot points to
|
|
;;;; a record whose key has that hash value.
|
|
;;;;
|
|
;;;; Positions, lengths, and hash values are 32-bit quantities, stored in
|
|
;;;; little-endian form in 4 bytes. Thus a cdb must fit into 4 gigabytes.
|
|
;;;;
|
|
;;;; A record is located as follows. Compute the hash value of the key in
|
|
;;;; the record. The hash value modulo 256 is the number of a hash table.
|
|
;;;; The hash value divided by 256, modulo the length of that table, is a
|
|
;;;; slot number. Probe that slot, the next higher slot, and so on, until
|
|
;;;; you find the record or run into an empty slot.
|
|
;;;;
|
|
;;;; The cdb hash function is ``h = ((h << 5) + h) ^ c'', with a starting
|
|
;;;; hash of 5381.
|
|
;;;;
|
|
|
|
(defpackage "ECL-CDB"
|
|
(:use "CL")
|
|
(:export "WITH-OUTPUT-TO-CDB" "ADD-RECORD" "LOOKUP-CDB"))
|
|
|
|
(in-package "ECL-CDB")
|
|
|
|
(defstruct cdb
|
|
stream
|
|
pathname
|
|
word-buffer
|
|
tables
|
|
temporary-pathname)
|
|
|
|
(defun read-word (stream)
|
|
(logior (read-byte stream)
|
|
(ash (read-byte stream) 8)
|
|
(ash (read-byte stream) 16)
|
|
(ash (read-byte stream) 24)))
|
|
|
|
(defun write-word (byte stream)
|
|
(declare (type (unsigned-byte 32) byte)
|
|
(stream stream)
|
|
(optimize speed (safety 0)))
|
|
(write-byte (logand #xff byte) stream)
|
|
(write-byte (logand #xff (ash byte -8)) stream)
|
|
(write-byte (logand #xff (ash byte -16)) stream)
|
|
(write-byte (logand #xff (ash byte -24)) stream))
|
|
|
|
(defun write-vector (vector stream)
|
|
(declare (type (array (unsigned-byte 8) (*)) vector))
|
|
(loop for v across vector
|
|
do (write-word v stream)))
|
|
|
|
(defun to-cdb-hash (key-vector)
|
|
(declare (type (array (unsigned-byte 8) (*)) key-vector))
|
|
(loop with h of-type (unsigned-integer 32) = 5381
|
|
for byte of-type (unsigned-byte 8) across key-vector
|
|
do (setf h (logxor (logand #xffffffff
|
|
(+ (ash (logand #.(ash #xffffffff -5) h)
|
|
5)
|
|
h))
|
|
byte))
|
|
finally (return h)))
|
|
|
|
(defun %make-cdb (cdb-pathname temporary-pathname)
|
|
(let ((stream (open temporary-pathname
|
|
:direction :output
|
|
:if-exists :supersede
|
|
:if-does-not-exist :create
|
|
:element-type '(unsigned-byte 8))))
|
|
(if stream
|
|
(progn
|
|
(file-position stream 0)
|
|
(dotimes (i (* 256 2))
|
|
(write-word 0 stream))
|
|
(make-cdb :stream stream
|
|
:pathname cdb-pathname
|
|
:tables (make-array 256 :initial-element nil)
|
|
:temporary-pathname temporary-pathname))
|
|
(error "Unable to create CDB at filename ~A" temporary-pathname))))
|
|
|
|
(defmacro with-output-to-cdb ((cdb cdb-pathname temporary-pathname) &body body)
|
|
`(let (,cdb)
|
|
(unwind-protect
|
|
(progn
|
|
(setf ,cdb (%make-cdb ,cdb-pathname ,temporary-pathname))
|
|
,@body)
|
|
(close-cdb ,cdb))))
|
|
|
|
(defun add-record (key value cdb)
|
|
;; This routine dumps the data of this record, storing a small
|
|
;; reference in the CDB structure itself. This reference will be
|
|
;; used to create the hash.
|
|
(let* ((hash-key (to-cdb-hash key))
|
|
(table-index (logand #xff hash-key))
|
|
(stream (cdb-stream cdb)))
|
|
(push (cons hash-key (file-position stream))
|
|
(aref (cdb-tables cdb) table-index))
|
|
(write-word (length key) stream)
|
|
(write-word (length value) stream)
|
|
(write-sequence key stream)
|
|
(write-sequence value stream)))
|
|
|
|
(defun dump-table (table stream)
|
|
(declare (optimize speed (safety 0)))
|
|
;; TABLE is an association list of (HASH-KEY . FILE-POSITION)
|
|
;; We dump it at the end of the file. The length of the actual
|
|
;; file table can be a bit larger, to avoid many coincidences.
|
|
;; Here we use a factor 2.
|
|
(loop with length = (* 2 (length table))
|
|
with vector = (make-array (* 2 length) :initial-element 0
|
|
:element-type '(unsigned-byte 32))
|
|
for (hash-key . pos) in table
|
|
for index = (mod (ash hash-key -8) length)
|
|
do (loop for disp from 0 below length
|
|
for i = (* 2 (mod (+ disp index) length))
|
|
for record-pos = (aref vector (1+ i))
|
|
until (zerop record-pos)
|
|
finally (setf (aref vector i) hash-key (aref vector (1+ i)) pos))
|
|
finally (progn (write-vector vector stream)
|
|
(return length))))
|
|
|
|
(defun dump-cdb (cdb)
|
|
;; After we have dumped all the records in the file, we append the
|
|
;; hash tables and recreate the index table at the beginning.
|
|
(let* ((stream (cdb-stream cdb))
|
|
(index (make-array (* 2 256) :element-type '(unsigned-byte 32))))
|
|
(loop for table across (cdb-tables cdb)
|
|
for i of-type fixnum from 0 by 2
|
|
do (setf (aref index i) (file-position stream)
|
|
(aref index (1+ i)) (dump-table table stream)))
|
|
(file-position stream 0)
|
|
(write-vector index stream)))
|
|
|
|
(defun close-cdb (cdb)
|
|
(let ((stream (cdb-stream cdb)))
|
|
(when (open-stream-p stream)
|
|
(dump-cdb cdb)
|
|
(close stream)
|
|
(when (cdb-pathname cdb)
|
|
(rename-file (cdb-temporary-pathname cdb)
|
|
(cdb-pathname cdb))))))
|
|
|
|
(defun cdb-error (stream)
|
|
(error "Error when reading CDB database ~A" stream))
|
|
|
|
(defun values-coincide (position key-vector stream return-position-p)
|
|
(unless (file-position stream position)
|
|
(cdb-error stream))
|
|
(let ((key-length (read-word stream)))
|
|
(when (= key-length (length key-vector))
|
|
(let* ((value-length (read-word stream))
|
|
(other-key (make-array key-length :element-type '(unsigned-byte 8))))
|
|
(read-sequence other-key stream)
|
|
(when (equalp other-key key-vector)
|
|
(if return-position-p
|
|
(file-position stream)
|
|
(let ((value (make-array value-length :element-type '(unsigned-byte 8))))
|
|
(read-sequence value stream)
|
|
value)
|
|
))))))
|
|
|
|
(defun lookup-cdb (key stream &optional return-position-p)
|
|
(if (streamp stream)
|
|
(let* ((hash (to-cdb-hash key))
|
|
(table (logand #xFF hash)))
|
|
(unless (file-position stream (* table 8))
|
|
(cdb-error stream))
|
|
(let* ((start (read-word stream))
|
|
(length (read-word stream))
|
|
(index (mod (ash hash -8) length)))
|
|
(loop for reset = t
|
|
for i from 0 below length
|
|
for rounded-i = (mod (+ index i) length)
|
|
for position = (+ start (* 8 rounded-i))
|
|
do (progn
|
|
(when reset
|
|
(unless (file-position stream position)
|
|
(cdb-error stream))
|
|
(setf reset nil))
|
|
(let* ((other-hash (read-word stream))
|
|
(record-position (read-word stream)))
|
|
(when (zerop record-position)
|
|
(return nil))
|
|
(when (= other-hash hash)
|
|
(let ((output (values-coincide record-position key stream
|
|
return-position-p)))
|
|
(if output
|
|
(return output)
|
|
(setf reset t)))))))))
|
|
(with-open-file (s stream :direction :input
|
|
:element-type '(unsigned-byte 8))
|
|
(lookup-cdb key s return-position-p))))
|
|
|
|
(defun map-cdb (function stream)
|
|
(if (streamp stream)
|
|
(let* ((index (make-array (* 256 2) :element-type '(unsigned-byte 32))))
|
|
(unless (file-position stream 0)
|
|
(cdb-error stream))
|
|
(unless (= (read-sequence index stream) (length index))
|
|
(cdb-error stream))
|
|
(loop for i from 0 by 2 below (length index)
|
|
for table-position = (aref index i)
|
|
for table-length = (aref index (1+ i))
|
|
do (progn
|
|
(unless (file-position stream table-position)
|
|
(cdb-error stream))
|
|
(loop for i from 0 below table-length
|
|
for position from table-position by 8
|
|
for record-hash = (read-word stream)
|
|
for record-position = (read-word stream)
|
|
unless (zerop record-position)
|
|
do (progn
|
|
(unless (file-position stream record-position)
|
|
(cdb-error stream))
|
|
(let* ((key-length (read-word stream))
|
|
(value-length (read-word stream))
|
|
(key (make-array key-length
|
|
:element-type '(unsigned-byte 8)))
|
|
(value (make-array value-length
|
|
:element-type '(unsigned-byte 8))))
|
|
(unless (and (= (read-sequence key stream)
|
|
key-length)
|
|
(= (read-sequence value stream)
|
|
value-length))
|
|
(cdb-error stream))
|
|
(funcall function key value)))))))
|
|
(with-open-file (s stream :direction :input :element-type '(unsigned-byte 8))
|
|
(map-cdb function s))))
|
|
|
|
(provide :ecl-cdb)
|