1
Fork 0
mirror of git://git.sv.gnu.org/emacs.git synced 2026-03-03 04:21:28 -08:00

lisp/cedet/semantic/wisent.el: New file.

This commit is contained in:
Chong Yidong 2009-09-07 16:38:54 +00:00
parent bb05142310
commit ca8a4d5e32

View file

@ -0,0 +1,346 @@
;;; semantic/wisent.el --- Wisent - Semantic gateway
;;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007
;;; Free Software Foundation, Inc.
;; Author: David Ponce <david@dponce.com>
;; Maintainer: David Ponce <david@dponce.com>
;; Created: 30 Aug 2001
;; Keywords: syntax
;; This file is part of GNU Emacs.
;; GNU Emacs is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
;;; Commentary:
;;
;; Here are functions necessary to use the Wisent LALR parser from
;; Semantic environment.
;;; History:
;;
;;; Code:
(require 'semantic)
(require 'semantic/wisent/wisent)
;;; Lexical analysis
;;
(defvar wisent-lex-istream nil
"Input stream of `semantic-lex' syntactic tokens.")
(defvar wisent-lex-lookahead nil
"Extra lookahead token.
When non-nil it is directly returned by `wisent-lex-function'.")
;; Maintain this alias for compatibility until all WY grammars have
;; been translated again to Elisp code.
(semantic-alias-obsolete 'wisent-lex-make-token-table
'semantic-lex-make-type-table)
(defmacro wisent-lex-eoi ()
"Return an End-Of-Input lexical token.
The EOI token is like this: ($EOI "" POINT-MAX . POINT-MAX)."
`(cons ',wisent-eoi-term
(cons ""
(cons (point-max) (point-max)))))
(defmacro define-wisent-lexer (name doc &rest body)
"Create a new lexical analyzer with NAME.
DOC is a documentation string describing this analyzer.
When a token is available in `wisent-lex-istream', eval BODY forms
sequentially. BODY must return a lexical token for the LALR parser.
Each token in input was produced by `semantic-lex', it is a list:
(TOKSYM START . END)
TOKSYM is a terminal symbol used in the grammar.
START and END mark boundary in the current buffer of that token's
value.
Returned tokens must have the form:
(TOKSYM VALUE START . END)
where VALUE is the buffer substring between START and END positions."
`(defun
,name () ,doc
(cond
(wisent-lex-lookahead
(prog1 wisent-lex-lookahead
(setq wisent-lex-lookahead nil)))
(wisent-lex-istream
,@body)
((wisent-lex-eoi)))))
(define-wisent-lexer wisent-lex
"Return the next available lexical token in Wisent's form.
The variable `wisent-lex-istream' contains the list of lexical tokens
produced by `semantic-lex'. Pop the next token available and convert
it to a form suitable for the Wisent's parser."
(let* ((tk (car wisent-lex-istream)))
;; Eat input stream
(setq wisent-lex-istream (cdr wisent-lex-istream))
(cons (semantic-lex-token-class tk)
(cons (semantic-lex-token-text tk)
(semantic-lex-token-bounds tk)))))
;;; Syntax analysis
;;
(defvar wisent-error-function nil
"Function used to report parse error.
By default use the function `wisent-message'.")
(make-variable-buffer-local 'wisent-error-function)
(defvar wisent-lexer-function 'wisent-lex
"Function used to obtain the next lexical token in input.
Should be a lexical analyzer created with `define-wisent-lexer'.")
(make-variable-buffer-local 'wisent-lexer-function)
;; Tag production
;;
(defsubst wisent-raw-tag (semantic-tag)
"Return raw form of given Semantic tag SEMANTIC-TAG.
Should be used in semantic actions, in grammars, to build a Semantic
parse tree."
(nconc semantic-tag
(if (or $region
(setq $region (nthcdr 2 wisent-input)))
(list (car $region) (cdr $region))
(list (point-max) (point-max)))))
(defsubst wisent-cook-tag (raw-tag)
"From raw form of Semantic tag RAW-TAG, return a list of cooked tags.
Should be used in semantic actions, in grammars, to build a Semantic
parse tree."
(let* ((cooked (semantic--tag-expand raw-tag))
(l cooked))
(while l
(semantic--tag-put-property (car l) 'reparse-symbol $nterm)
(setq l (cdr l)))
cooked))
;; Unmatched syntax collector
;;
(defun wisent-collect-unmatched-syntax (nomatch)
"Add lexical token NOMATCH to the cache of unmatched tokens.
See also the variable `semantic-unmatched-syntax-cache'.
NOMATCH is in Wisent's form: (SYMBOL VALUE START . END)
and will be collected in `semantic-lex' form: (SYMBOL START . END)."
(let ((region (cddr nomatch)))
(and (number-or-marker-p (car region))
(number-or-marker-p (cdr region))
(setq semantic-unmatched-syntax-cache
(cons (cons (car nomatch) region)
semantic-unmatched-syntax-cache)))))
;; Parser plug-ins
;;
;; The following functions permit to plug the Wisent LALR parser in
;; Semantic toolkit. They use the standard API provided by Semantic
;; to plug parsers in.
;;
;; Two plug-ins are available, BUT ONLY ONE MUST BE USED AT A TIME:
;;
;; - `wisent-parse-stream' designed to override the standard function
;; `semantic-parse-stream'.
;;
;; - `wisent-parse-region' designed to override the standard function
;; `semantic-parse-region'.
;;
;; Maybe the latter is faster because it eliminates a lot of function
;; call.
;;
(defun wisent-parse-stream (stream goal)
"Parse STREAM using the Wisent LALR parser.
GOAL is a nonterminal symbol to start parsing at.
Return the list (STREAM SEMANTIC-STREAM) where STREAM are those
elements of STREAM that have not been used. SEMANTIC-STREAM is the
list of semantic tags found.
The LALR parser automaton must be available in buffer local variable
`semantic--parse-table'.
Must be installed by `semantic-install-function-overrides' to override
the standard function `semantic-parse-stream'."
(let (wisent-lex-istream wisent-lex-lookahead la-elt cache)
;; IMPLEMENTATION NOTES:
;; `wisent-parse' returns a lookahead token when it stopped
;; parsing before encountering the end of input. To re-enter the
;; parser it is necessary to push back in the lexical input stream
;; the last lookahead token issued. Because the format of
;; lookahead tokens and tokens in STREAM can be different the
;; lookahead token is put in the variable `wisent-lex-lookahead'
;; before calling `wisent-parse'. Wisent's lexers always pop the
;; next lexical token from that variable when non nil, then from
;; the lexical input stream.
;;
;; The first element of STREAM is used to keep lookahead tokens
;; across successive calls to `wisent-parse-stream'. In fact
;; what is kept is a stack of lookaheads encountered so far. It
;; is cleared when `wisent-parse' returns a valid semantic tag,
;; or twice the same lookahead token! The latter indicates that
;; there is a syntax error on that token. If so, tokens currently
;; in the lookahead stack have not been used, and are moved into
;; `semantic-unmatched-syntax-cache'. When the parser will be
;; re-entered, a new lexical token will be read from STREAM.
;;
;; The first element of STREAM that contains the lookahead stack
;; has this format (compatible with the format of `semantic-lex'
;; tokens):
;;
;; (LOOKAHEAD-STACK START . END)
;;
;; where LOOKAHEAD-STACK is a list of lookahead tokens. And
;; START/END are the bounds of the lookahead at top of stack.
;; Retrieve lookahead token from stack
(setq la-elt (car stream))
(if (consp (car la-elt))
;; The first elt of STREAM contains a lookahead stack
(setq wisent-lex-lookahead (caar la-elt)
stream (cdr stream))
(setq la-elt nil))
;; Parse
(setq wisent-lex-istream stream
cache (semantic-safe "wisent-parse-stream: %s"
(condition-case error-to-filter
(wisent-parse semantic--parse-table
wisent-lexer-function
wisent-error-function
goal)
(args-out-of-range
(if (and (not debug-on-error)
(= wisent-parse-max-stack-size
(nth 2 error-to-filter)))
(progn
(message "wisent-parse-stream: %s"
(error-message-string error-to-filter))
(message "wisent-parse-max-stack-size \
might need to be increased"))
(apply 'signal error-to-filter))))))
;; Manage returned lookahead token
(if wisent-lookahead
(if (eq (caar la-elt) wisent-lookahead)
;; It is already at top of lookahead stack
(progn
(setq cache nil
la-elt (car la-elt))
(while la-elt
;; Collect unmatched tokens from the stack
(run-hook-with-args
'wisent-discarding-token-functions (car la-elt))
(setq la-elt (cdr la-elt))))
;; New lookahead token
(if (or (consp cache) ;; Clear the stack if parse succeeded
(null la-elt))
(setq la-elt (cons nil nil)))
;; Push it into the stack
(setcar la-elt (cons wisent-lookahead (car la-elt)))
;; Update START/END
(setcdr la-elt (cddr wisent-lookahead))
;; Push (LOOKAHEAD-STACK START . END) in STREAM
(setq wisent-lex-istream (cons la-elt wisent-lex-istream))))
;; Return (STREAM SEMANTIC-STREAM)
(list wisent-lex-istream
(if (consp cache) cache '(nil))
)))
(defun wisent-parse-region (start end &optional goal depth returnonerror)
"Parse the area between START and END using the Wisent LALR parser.
Return the list of semantic tags found.
Optional arguments GOAL is a nonterminal symbol to start parsing at,
DEPTH is the lexical depth to scan, and RETURNONERROR is a flag to
stop parsing on syntax error, when non-nil.
The LALR parser automaton must be available in buffer local variable
`semantic--parse-table'.
Must be installed by `semantic-install-function-overrides' to override
the standard function `semantic-parse-region'."
(if (or (< start (point-min)) (> end (point-max)) (< end start))
(error "Invalid bounds [%s %s] passed to `wisent-parse-region'"
start end))
(let* ((case-fold-search semantic-case-fold)
(wisent-lex-istream (semantic-lex start end depth))
ptree tag cooked lstack wisent-lex-lookahead)
;; Loop while there are lexical tokens available
(while wisent-lex-istream
;; Parse
(setq wisent-lex-lookahead (car lstack)
tag (semantic-safe "wisent-parse-region: %s"
(wisent-parse semantic--parse-table
wisent-lexer-function
wisent-error-function
goal)))
;; Manage returned lookahead token
(if wisent-lookahead
(if (eq (car lstack) wisent-lookahead)
;; It is already at top of lookahead stack
(progn
(setq tag nil)
(while lstack
;; Collect unmatched tokens from lookahead stack
(run-hook-with-args
'wisent-discarding-token-functions (car lstack))
(setq lstack (cdr lstack))))
;; Push new lookahead token into the stack
(setq lstack (cons wisent-lookahead lstack))))
;; Manage the parser result
(cond
;; Parse succeeded, cook result
((consp tag)
(setq lstack nil ;; Clear the lookahead stack
cooked (semantic--tag-expand tag)
ptree (append cooked ptree))
(while cooked
(setq tag (car cooked)
cooked (cdr cooked))
(or (semantic--tag-get-property tag 'reparse-symbol)
(semantic--tag-put-property tag 'reparse-symbol goal)))
)
;; Return on error if requested
(returnonerror
(setq wisent-lex-istream nil)
))
;; Work in progress...
(if wisent-lex-istream
(and (eq semantic-working-type 'percent)
(boundp 'semantic--progress-reporter)
semantic--progress-reporter
(progress-reporter-update
semantic--progress-reporter
(/ (* 100 (semantic-lex-token-start
(car wisent-lex-istream)))
(point-max))))))
;; Return parse tree
(nreverse ptree)))
;;; Interfacing with edebug
;;
(add-hook
'edebug-setup-hook
#'(lambda ()
(def-edebug-spec define-wisent-lexer
(&define name stringp def-body)
)
))
(provide 'semantic/wisent)
;;; semantic/wisent.el ends here