diff --git a/contrib/ecl-cdb/ecl-cdb.lisp b/contrib/ecl-cdb/ecl-cdb.lisp new file mode 100644 index 000000000..b4419f9d4 --- /dev/null +++ b/contrib/ecl-cdb/ecl-cdb.lisp @@ -0,0 +1,265 @@ +;;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: SYSTEM -*- +;;;; +;;;; Copyright (c) 2011, Juan Jose Garcia-Ripoll. +;;;; +;;;; This program is free software; you can redistribute it and/or +;;;; modify it under the terms of the GNU Library General Public +;;;; License as published by the Free Software Foundation; either +;;;; version 2 of the License, or (at your option) any later version. +;;;; +;;;; See file '../Copyright' for full details. +;;;; +;;;; This file implements the CDB specification, which you find here +;;;; http://cr.yp.to/cdb/cdb.txt and is reproduced below, replicating +;;;; the interface developed by Zach Beane under the name ZCDB. +;;;; +;;;; A structure for constant databases +;;;; 19960914 +;;;; Copyright 1996 +;;;; D. J. Bernstein, djb@pobox.com +;;;; +;;;; A cdb is an associative array: it maps strings (``keys'') to strings +;;;; (``data''). +;;;; +;;;; A cdb contains 256 pointers to linearly probed open hash tables. The +;;;; hash tables contain pointers to (key,data) pairs. A cdb is stored in +;;;; a single file on disk: +;;;; +;;;; +----------------+---------+-------+-------+-----+---------+ +;;;; | p0 p1 ... p255 | records | hash0 | hash1 | ... | hash255 | +;;;; +----------------+---------+-------+-------+-----+---------+ +;;;; +;;;; Each of the 256 initial pointers states a position and a length. The +;;;; position is the starting byte position of the hash table. The length +;;;; is the number of slots in the hash table. +;;;; +;;;; Records are stored sequentially, without special alignment. A record +;;;; states a key length, a data length, the key, and the data. +;;;; +;;;; Each hash table slot states a hash value and a byte position. If the +;;;; byte position is 0, the slot is empty. Otherwise, the slot points to +;;;; a record whose key has that hash value. +;;;; +;;;; Positions, lengths, and hash values are 32-bit quantities, stored in +;;;; little-endian form in 4 bytes. Thus a cdb must fit into 4 gigabytes. +;;;; +;;;; A record is located as follows. Compute the hash value of the key in +;;;; the record. The hash value modulo 256 is the number of a hash table. +;;;; The hash value divided by 256, modulo the length of that table, is a +;;;; slot number. Probe that slot, the next higher slot, and so on, until +;;;; you find the record or run into an empty slot. +;;;; +;;;; The cdb hash function is ``h = ((h << 5) + h) ^ c'', with a starting +;;;; hash of 5381. +;;;; + +(defpackage "ECL-CDB" + (:use "CL") + (:export "WITH-OUTPUT-TO-CDB" "ADD-RECORD" "LOOKUP-CDB")) + +(in-package "ECL-CDB") + +(defstruct cdb + stream + pathname + word-buffer + tables + temporary-pathname) + +(defun read-word (stream) + (logior (read-byte stream) + (ash (read-byte stream) 8) + (ash (read-byte stream) 16) + (ash (read-byte stream) 24))) + +(defun write-word (byte stream) + (declare (type (unsigned-byte 32) byte) + (stream stream) + (optimize speed)) + (write-byte (logand #xff byte) stream) + (write-byte (logand #xff (ash byte -8)) stream) + (write-byte (logand #xff (ash byte -16)) stream) + (write-byte (logand #xff (ash byte -24)) stream)) + +(defun write-vector (vector stream) + (declare (type (array (unsigned-byte 8) (*)) vector)) + (loop for v across vector + do (write-word v stream)) + (length vector)) + +(defun to-cdb-hash (key-vector) + (declare (type (array (unsigned-byte 8) (*)) vector)) + (loop with h of-type (unsigned-integer 32) = 5381 + for byte of-type (unsigned-byte 8) across key-vector + do (setf h (logxor (logand #xffffffff + (+ (ash (logand #.(ash #xffffffff -5) h) + 5) + h)) + byte)) + finally (return h))) + +(defun %make-cdb (cdb-pathname temporary-pathname) + (let ((stream (open temporary-pathname + :direction :output + :if-exists :supersede + :if-does-not-exist :create + :element-type '(unsigned-byte 8)))) + (if stream + (progn + (file-position stream 0) + (dotimes (i (* 256 2)) + (write-word 0 stream)) + (make-cdb :stream stream + :pathname cdb-pathname + :tables (make-array 256 :initial-element nil) + :temporary-pathname temporary-pathname)) + (error "Unable to create CDB at filename ~A" temporary-pathname)))) + +(defmacro with-output-to-cdb ((cdb cdb-pathname temporary-pathname) &body body) + `(let (,cdb) + (unwind-protect + (progn + (setf ,cdb (%make-cdb ,cdb-pathname ,temporary-pathname)) + ,@body) + (close-cdb ,cdb)))) + +(defun add-record (key value cdb) + ;; This routine dumps the data of this record, storing a small + ;; reference in the CDB structure itself. This reference will be + ;; used to create the hash. + (let* ((hash-key (to-cdb-hash key)) + (table-index (logand #xff hash-key)) + (stream (cdb-stream cdb))) + (push (cons hash-key (file-position stream)) + (aref (cdb-tables cdb) table-index)) + (write-word (length key) stream) + (write-word (length value) stream) + (write-sequence key stream) + (write-sequence value stream))) + +(defun dump-table (table stream) + (declare (optimize speed)) + ;; TABLE is an association list of (HASH-KEY . FILE-POSITION) + ;; We dump it at the end of the file. The length of the actual + ;; file table can be a bit larger, to avoid many coincidences. + ;; Here we use a factor 2. + (loop with length = (* 2 (length table)) + with vector = (make-array (* 2 length) :initial-element 0) + for (hash-key . pos) in table + for index = (mod (ash hash-key -8) length) + do (loop for disp from 0 below length + for i = (* 2 (mod (+ disp index) length)) + for record-pos = (aref vector (1+ i)) + until (zerop record-pos) + finally (setf (aref vector i) hash-key (aref vector (1+ i)) pos)) + finally (return (write-vector vector stream)))) + +(defun dump-cdb (cdb) + ;; After we have dumped all the records in the file, we append the + ;; hash tables and recreate the index table at the beginning. + (let* ((stream (cdb-stream cdb)) + (index (make-array (* 2 256) :element-type '(unsigned-byte 32)))) + (loop for table across (cdb-tables cdb) + for i of-type fixnum from 0 by 2 + do (setf (aref index i) (file-position stream) + (aref index (1+ i)) (dump-table table stream))) + (file-position stream 0) + (write-vector index stream))) + +(defun close-cdb (cdb) + (let ((stream (cdb-stream cdb))) + (when (open-stream-p stream) + (dump-cdb cdb) + (close stream) + (when (cdb-pathname cdb) + (rename-file (cdb-temporary-pathname cdb) + (cdb-pathname cdb)))))) + +(defun cdb-error (stream) + (error "Error when reading CDB database ~A" stream)) + +(defun values-coincide (position key-vector stream return-position-p) + (unless (file-position stream position) + (cdb-error stream)) + (let ((key-length (read-word stream))) + (when (= key-length (length key-vector)) + (let* ((value-length (read-word stream)) + (other-key (make-array key-length :element-type '(unsigned-byte 8)))) + (read-sequence other-key stream) + (when (equalp other-key key-vector) + (if return-position-p + (file-position stream) + (let ((value (make-array value-length :element-type '(unsigned-byte 8)))) + (read-sequence value stream) + value) + )))))) + +(defun lookup-cdb (key stream &optional return-position-p) + (if (streamp stream) + (let* ((hash (to-cdb-hash key)) + (table (logand #xFF hash))) + (unless (file-position stream (* table 8)) + (cdb-error stream)) + (let* ((start (read-word stream)) + (length (read-word stream)) + (index (mod (ash hash -8) length))) + (loop for reset = t + for i from 0 below length + for rounded-i = (mod (+ index i) length) + for position = (+ start (* 8 rounded-i)) + do (progn + (when reset + (unless (file-position stream position) + (cdb-error stream)) + (setf reset nil)) + (let* ((other-hash (read-word stream)) + (record-position (read-word stream))) + (when (zerop record-position) + (return nil)) + (when (= other-hash hash) + (let ((output (values-coincide record-position key stream + return-position-p))) + (if output + (return output) + (setf reset t))))))))) + (with-open-file (s stream :direction :input + :element-type '(unsigned-byte 8)) + (lookup-cdb key s return-position-p)))) + +(defun map-cdb (function stream) + (if (streamp stream) + (let* ((index (make-array (* 256 2) :element-type '(unsigned-byte 32)))) + (unless (file-position stream 0) + (cdb-error stream)) + (unless (= (read-sequence index stream) (length index)) + (cdb-error stream)) + (loop for i from 0 by 2 below (length index) + for table-position = (aref index i) + for table-length = (aref index (1+ i)) + do (progn + (unless (file-position stream table-position) + (cdb-error stream)) + (loop for i from 0 below table-length + for position from table-position by 8 + for record-hash = (read-word stream) + for record-position = (read-word stream) + unless (zerop record-position) + do (progn + (unless (file-position stream record-position) + (cdb-error stream)) + (let* ((key-length (read-word stream)) + (value-length (read-word stream)) + (key (make-array key-length + :element-type '(unsigned-byte 8))) + (value (make-array value-length + :element-type '(unsigned-byte 8)))) + (unless (and (= (read-sequence key stream) + key-length) + (= (read-sequence value stream) + value-length)) + (cdb-error stream)) + (funcall function key value))))))) + (with-open-file (s stream :direction :input :element-type '(unsigned-byte 8)) + (map-cdb function s)))) + +(provide :ecl-cdb) diff --git a/contrib/ecl-cdb/ecl-help.lisp b/contrib/ecl-cdb/ecl-help.lisp new file mode 100644 index 000000000..0800f1edd --- /dev/null +++ b/contrib/ecl-cdb/ecl-help.lisp @@ -0,0 +1,54 @@ +;;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: SYSTEM -*- +;;;; +;;;; Copyright (c) 2011, Juan Jose Garcia-Ripoll. +;;;; +;;;; This program is free software; you can redistribute it and/or +;;;; modify it under the terms of the GNU Library General Public +;;;; License as published by the Free Software Foundation; either +;;;; version 2 of the License, or (at your option) any later version. +;;;; +;;;; See file '../Copyright' for full details. +;;;; + +(in-package "SYSTEM") + +(defun to-cdb-vector (object) + (let* ((vector (make-array 128 :adjustable t + :fill-pointer 0 + :element-type '(unsigned-byte 8) + :initial-element 0)) + (stream (ext:make-sequence-output-stream + vector :external-format :utf-8))) + (with-standard-io-syntax + (let ((si::*print-package* (find-package "CL"))) + (write object :stream stream :pretty nil + :readably nil :escape t))) + vector)) + +(defun from-cdb-vector (vector) + (let* ((stream (ext:make-sequence-input-stream + vector :external-format :utf-8))) + (read stream nil nil nil))) + +(defun search-help-file (string path) + (let* ((key (to-cdb-vector string)) + (value (ecl-cdb:lookup-cdb key path))) + (when value + (from-cdb-vector value)))) + +(defun dump-help-file (hash-table path &optional merge test) + (when merge + (error "DUMP-HELP-FILE does not suport yet the third argument")) + (ecl-cdb:with-output-to-cdb (cdb nil path) + (loop for k being the hash-key of hash-table + using (hash-value v) + do (ecl-cdb:add-record (to-cdb-vector k) + (to-cdb-vector v) + cdb))) + ;; Testing the consistency of the output + (when test + (loop for k being the hash-key of hash-table + using (hash-value v) + for other-value = (search-help-file k path) + unless (and other-value (equalp other-value v)) + do (error "Symbol ~A not found in database ~A" k path)))) diff --git a/src/compile.lsp.in b/src/compile.lsp.in index 3427a415a..b40b4648b 100755 --- a/src/compile.lsp.in +++ b/src/compile.lsp.in @@ -28,6 +28,8 @@ ;;; #+stage1 (progn + (load "ext:ecl-cdb;ecl-cdb") + (load "ext:ecl-cdb;ecl-help") (load "@true_srcdir@/doc/help.lsp") (si::dump-documentation "@true_builddir@/help.doc")) @@ -159,6 +161,15 @@ :dir "build:@ECL_CMPDIR@;" :prefix "CMP" :builtin #+:BUILTIN-CMP t #-:BUILTIN-CMP nil) +(build-module "ecl-cdb" '("ext:ecl-cdb;ecl-cdb.lisp") + :dir "build:ext;" :prefix "EXT" + :builtin #+:WANTS-DLOPEN nil #-:WANTS-DLOPEN t) + +(build-module "ecl-help" '("ext:ecl-cdb;ecl-cdb.lisp" + "ext:ecl-cdb;ecl-help.lisp") + :dir "build:ext;" :prefix "EXT" + :builtin #+:WANTS-DLOPEN nil #-:WANTS-DLOPEN t) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;; EXTENSIONS diff --git a/src/lsp/autoload.lsp b/src/lsp/autoload.lsp index bdfd68c02..0b48a28a4 100644 --- a/src/lsp/autoload.lsp +++ b/src/lsp/autoload.lsp @@ -146,6 +146,8 @@ in Windows) to learn this.") ;;; Help. +(autoload "sys:ecl-help" 'dump-help-file 'search-help-file) + (defun help (&optional (symbol 'help)) "Args: (&optional symbol) ECL specific. diff --git a/src/lsp/helpfile.lsp b/src/lsp/helpfile.lsp index e9299463e..6f8a3a2b6 100644 --- a/src/lsp/helpfile.lsp +++ b/src/lsp/helpfile.lsp @@ -12,85 +12,6 @@ (in-package "SYSTEM") -;;;;---------------------------------------------------------------------- -;;;; Help files -;;;; - -(defun read-help-file (path) - (let* ((*package* (find-package "CL")) - (file (open path :direction :input))) - (do ((end nil) - (h (make-hash-table :size 1024 :test #'equal))) - (end h) - (do ((c (read-char file nil))) - ((or (not c) (eq c #\^_)) - (when (not c) (setq end t))) - ) - (when (not end) - (let* ((key (read file)) - (value (read file))) - (si::hash-set key h value)))))) - -(defun dump-help-file (hash-table path &optional (merge nil)) - (let ((entries nil)) - (when merge - (let ((old-hash (read-help-file path))) - (push old-hash *documentation-pool*) - (maphash #'(lambda (key doc) - (when doc - (do* ((list doc) - (doc-type (first list)) - (string (second list))) - (list) - (set-documentation key doc-type string)))) - hash-table) - (setq hash-table (pop *documentation-pool*)))) - (maphash #'(lambda (key doc) - (when (and (symbolp key) doc) - (push (cons key doc) entries))) - hash-table) - (setq entries (sort entries #'string-lessp :key #'car)) - (let* ((*package* (find-package "CL")) - (file (open path :direction :output))) - (progv (car +ecl-syntax-progv-list+) - (cdr +ecl-syntax-progv-list+) - (dolist (l entries) - (format file "~A~S~%~S~%" #\^_ (car l) (rest l)))) - (close file) - path))) - -(defun search-help-file (key path &aux (pos 0)) - (declare (ext:type-assertions nil)) - (when (not (or (stringp key) (symbolp key))) - (return-from search-help-file nil)) - (labels ((bin-search (file start end &aux (delta 0) (middle 0) sym) - (declare (fixnum start end delta middle)) - (when (< start end) - (setq middle (round (+ start end) 2)) - (file-position file middle) - (if (and (plusp (setq delta (scan-for #\^_ file))) - (<= delta (- end middle))) - (if (equal key (setq sym (read file))) - t - (if (string< key sym) - (bin-search file start (1- middle)) - (bin-search file (+ middle delta) end))) - (bin-search file start (1- middle))))) - (scan-for (char file) - (do ((v #\space (read-char file nil nil)) - (n 0 (1+ n))) - ((or (eql v char) (not v)) (if v n -1)) - (declare (fixnum n))))) - (when (not (probe-file path)) - (return-from search-help-file nil)) - (let* ((*package* (find-package "CL")) - (file (open path :direction :input)) - output) - (when (bin-search file 0 (file-length file)) - (setq output (read file))) - (close file) - output))) - ;;;;---------------------------------------------------------------------- ;;;; Documentation system ;;;;