1
Fork 0
mirror of git://git.sv.gnu.org/emacs.git synced 2026-01-04 11:00:45 -08:00

Add support for Mercury (https://mercurylang.org) in 'etags'

Tag declarations starting lines with ':-'.
By default, all declarations are tagged.  Optionally, first
predicate or functions in clauses can be tagged as in Prolog
support using '--declarations'.  (Bug#47408).
* lib-src/etags.c (test_objc_is_mercury, Mercury_functions)
(mercury_skip_comment,  mercury_decl, mercury_pr):
Implement Mercury support.  As Mercury and Objective-C have
the same file extension .m, a heuristic test tries to detect
the language.

* doc/man/etags.1: Document the change.  Add Mercury-specific
 behavior for '--declarations'.  This option tags first
predicates or functions in clauses in addition to declarations.
This commit is contained in:
Fabrice Nicol 2021-06-01 04:15:59 +02:00 committed by Eli Zaretskii
parent 2207f9adcc
commit 5a8a5a990a
5 changed files with 2464 additions and 13 deletions

View file

@ -1,5 +1,5 @@
.\" See section COPYING for copyright and redistribution information.
.TH ETAGS 1 "2019-06-24" "GNU Tools" "GNU"
.TH ETAGS 1 "2021-03-30" "GNU Tools" "GNU"
.de BP
.sp
.ti -.2i
@ -50,9 +50,9 @@ format understood by
.BR vi ( 1 )\c
\&. Both forms of the program understand
the syntax of C, Objective C, C++, Java, Fortran, Ada, Cobol, Erlang,
Forth, Go, HTML, LaTeX, Emacs Lisp/Common Lisp, Lua, Makefile, Pascal, Perl,
Ruby, Rust, PHP, PostScript, Python, Prolog, Scheme and
most assembler\-like syntaxes.
Forth, Go, HTML, LaTeX, Emacs Lisp/Common Lisp, Lua, Makefile, Mercury, Pascal,
Perl, Ruby, Rust, PHP, PostScript, Python, Prolog, Scheme and most
assembler\-like syntaxes.
Both forms read the files specified on the command line, and write a tag
table (defaults: \fBTAGS\fP for \fBetags\fP, \fBtags\fP for
\fBctags\fP) in the current working directory.
@ -91,6 +91,9 @@ Only \fBctags\fP accepts this option.
In C and derived languages, create tags for function declarations,
and create tags for extern variables unless \-\-no\-globals is used.
In Lisp, create tags for (defvar foo) declarations.
In Mercury, declarations start a line with "\|\fB:-\fP\|" and are always
tagged. In addition, this option tags predicates or functions in first
rules of clauses, as in Prolog.
.TP
.B \-D, \-\-no\-defines
Do not create tag entries for C preprocessor constant definitions
@ -125,10 +128,14 @@ final brace of a function or structure definition in C and C++.
Parse the following files according to the given language. More than
one such options may be intermixed with filenames. Use \fB\-\-help\fP
to get a list of the available languages and their default filename
extensions. The "auto" language can be used to restore automatic
detection of language based on the file name. The "none"
language may be used to disable language parsing altogether; only
regexp matching is done in this case (see the \fB\-\-regex\fP option).
extensions. For example, as Mercury and Objective-C have same
filename extension \fI.m\fP, a test based on contents tries to detect
the language. If this test fails, \fB\-\-language=\fP\fImercury\fP or
\fB\-\-language=\fP\fIobjc\fP should be used.
The "auto" language can be used to restore automatic detection of language
based on the file name. The "none" language may be used to disable language
parsing altogether; only regexp matching is done in this case (see the
\fB\-\-regex\fP option).
.TP
.B \-\-members
Create tag entries for variables that are members of structure-like

View file

@ -111,6 +111,13 @@ filters.
* Changes in Emacs 28.1
+++
** Etags support for the Mercury programming language (https://mercurylang.org).
** Etags command line option --declarations now has Mercury-specific behavior.
All Mercury declarations are tagged by default.
For compatibility with Prolog etags support, predicates and functions appearing
first in clauses will also be tagged if etags is run with '--declarations'.
+++
** New command 'font-lock-update', bound to 'C-x x f'.
This command updates the syntax highlighting in this buffer.

View file

@ -142,7 +142,14 @@ University of California, as described above. */
# define CTAGS false
#endif
/* Copy to DEST from SRC (containing LEN bytes), and append a NUL byte. */
/* Define MERCURY_HEURISTICS_RATIO as it was necessary to disambiguate
Mercury from Objective C, which have same file extensions .m
See comments before function test_objc_is_mercury for details. */
#ifndef MERCURY_HEURISTICS_RATIO
# define MERCURY_HEURISTICS_RATIO 0.5
#endif
/* COPY to DEST from SRC (containing LEN bytes), and append a NUL byte. */
static void
memcpyz (void *dest, void const *src, ptrdiff_t len)
{
@ -359,6 +366,7 @@ static void HTML_labels (FILE *);
static void Lisp_functions (FILE *);
static void Lua_functions (FILE *);
static void Makefile_targets (FILE *);
static void Mercury_functions (FILE *);
static void Pascal_functions (FILE *);
static void Perl_functions (FILE *);
static void PHP_functions (FILE *);
@ -379,6 +387,7 @@ static ptrdiff_t readline_internal (linebuffer *, FILE *, char const *);
static bool nocase_tail (const char *);
static void get_tag (char *, char **);
static void get_lispy_tag (char *);
static void test_objc_is_mercury (char *, language **);
static void analyze_regex (char *);
static void free_regexps (void);
@ -684,9 +693,21 @@ static const char Makefile_help [] =
"In makefiles, targets are tags; additionally, variables are tags\n\
unless you specify '--no-globals'.";
/* Mercury and Objective C share the same .m file extensions. */
static const char *Mercury_suffixes [] =
{"m",
NULL};
static const char Mercury_help [] =
"In Mercury code, tags are all declarations beginning a line with ':-'\n\
and optionally Prolog-like definitions (first rule for a predicate or \
function).\n\
To enable this behavior, run etags using --declarations.";
static bool with_mercury_definitions = false;
float mercury_heuristics_ratio = MERCURY_HEURISTICS_RATIO;
static const char *Objc_suffixes [] =
{ "lm", /* Objective lex file */
"m", /* Objective C file */
"m", /* By default, Objective C file will be assumed. */
NULL};
static const char Objc_help [] =
"In Objective C code, tags include Objective C definitions for classes,\n\
@ -831,7 +852,9 @@ static language lang_names [] =
{ "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
{ "lua", Lua_help,Lua_functions,Lua_suffixes,NULL,Lua_interpreters},
{ "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
/* objc listed before mercury as it is a better default for .m extensions. */
{ "objc", Objc_help, plain_C_entries, Objc_suffixes },
{ "mercury", Mercury_help, Mercury_functions, Mercury_suffixes },
{ "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
{ "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
{ "php", PHP_help, PHP_functions, PHP_suffixes },
@ -958,6 +981,9 @@ Relative ones are stored relative to the output file's directory.\n");
puts
("\tand create tags for extern variables unless --no-globals is used.");
puts ("In Mercury, tag both declarations starting a line with ':-' and first\n\
predicates or functions in clauses.");
if (CTAGS)
puts ("-d, --defines\n\
Create tag entries for C #define constants and enum constants, too.");
@ -1783,6 +1809,11 @@ find_entries (FILE *inf)
if (parser == NULL)
{
lang = get_language_from_filename (curfdp->infname, true);
/* Disambiguate file names between Objc and Mercury */
if (lang != NULL && strcmp (lang->name, "objc") == 0)
test_objc_is_mercury (curfdp->infname, &lang);
if (lang != NULL && lang->function != NULL)
{
curfdp->lang = lang;
@ -6070,6 +6101,457 @@ prolog_atom (char *s, size_t pos)
return 0;
}
/*
* Support for Mercury
*
* Assumes that the declarationa starts at column 0.
* Original code by Sunichirou Sugou (1989) for Prolog.
* Rewritten by Anders Lindgren (1996) for Prolog.
* Adapted by Fabrice Nicol (2021) for Mercury.
* Note: Prolog-support behavior is preserved if
* --declarations is used, corresponding to
* with_mercury_definitions=true.
*/
static ptrdiff_t mercury_pr (char *, char *, ptrdiff_t);
static void mercury_skip_comment (linebuffer *, FILE *);
static bool is_mercury_type = false;
static bool is_mercury_quantifier = false;
static bool is_mercury_declaration = false;
/*
* Objective-C and Mercury have identical file extension .m
* To disambiguate between Objective C and Mercury, parse file
* with the following heuristics hook:
* - if line starts with :- choose Mercury unconditionally,
* - if line starts with #, @, choose Objective-C,
* - otherwise compute the following ratio:
*
* r = (number of lines with :-
* or % in non-commented parts or . at trimmed EOL)
* / (number of lines - number of lines starting by any amount
* of whitespace, optionally followed by comment(s))
*
* Note: strings are neglected in counts.
*
* If r > mercury_heuristics_ratio, choose Mercury.
* Experimental tests show that a possibly optimal default value for
* this floor value is around 0.5. This is the default value for
* MERCURY_HEURISTICS_RATIO, defined in the first lines of this file.
* The closer r to 0.5, the closer the source code to pure Prolog.
* Idiomatic Mercury is scored either with r = 1.0 or higher.
* Objective-C is scored with r = 0.0. When this fails, the r-score never
* rose above 0.1 in Objective-C tests.
*/
static void
test_objc_is_mercury (char *this_file, language **lang)
{
if (this_file == NULL) return;
FILE* fp = fopen (this_file, "r");
if (fp == NULL)
pfatal (this_file);
bool blank_line = false; /* Line starting with any amount of white space
followed by optional comment(s). */
bool commented_line = false;
bool found_dot = false;
bool only_space_before = true;
bool start_of_line = true;
int c;
intmax_t lines = 1;
intmax_t mercury_dots = 0;
intmax_t percentage_signs = 0;
intmax_t rule_signs = 0;
float ratio = 0;
while ((c = fgetc (fp)) != EOF)
{
switch (c)
{
case '\n':
if (! blank_line) ++lines;
blank_line = true;
commented_line = false;
start_of_line = true;
if (found_dot) ++mercury_dots;
found_dot = false;
only_space_before = true;
break;
case '.':
found_dot = ! commented_line;
only_space_before = false;
break;
case '%': /* More frequent in Mercury. May be modulo in Obj.-C. */
if (! commented_line)
{
++percentage_signs;
/* Cannot tell if it is a comment or modulo yet for sure.
Yet works for heuristic purposes. */
commented_line = true;
}
found_dot = false;
start_of_line = false;
only_space_before = false;
break;
case '/':
{
int d = fgetc (fp);
found_dot = false;
only_space_before = false;
if (! commented_line)
{
if (d == '*')
commented_line = true;
else
/* If d == '/', cannot tell if it is an Obj.-C comment:
may be Mercury integ. division. */
blank_line = false;
}
}
FALLTHROUGH;
case ' ':
case '\t':
start_of_line = false;
break;
case ':':
c = fgetc (fp);
if (start_of_line)
{
if (c == '-')
{
ratio = 1.0; /* Failsafe, not an operator in Obj.-C. */
goto out;
}
start_of_line = false;
}
else
{
/* p :- q. Frequent in Mercury.
Rare or in quoted exprs in Obj.-C. */
if (c == '-' && ! commented_line)
++rule_signs;
}
blank_line = false;
found_dot = false;
only_space_before = false;
break;
case '@':
case '#':
if (start_of_line || only_space_before)
{
ratio = 0.0;
goto out;
}
FALLTHROUGH;
default:
start_of_line = false;
blank_line = false;
found_dot = false;
only_space_before = false;
}
}
/* Fallback heuristic test. Not failsafe but errless in pratice. */
ratio = ((float) rule_signs + percentage_signs + mercury_dots) / lines;
out:
if (fclose (fp) == EOF)
pfatal (this_file);
if (ratio > mercury_heuristics_ratio)
{
/* Change the language from Objective C to Mercury. */
static language lang0 = { "mercury", Mercury_help, Mercury_functions,
Mercury_suffixes };
*lang = &lang0;
}
}
static void
Mercury_functions (FILE *inf)
{
char *cp, *last = NULL;
ptrdiff_t lastlen = 0, allocated = 0;
if (declarations) with_mercury_definitions = true;
LOOP_ON_INPUT_LINES (inf, lb, cp)
{
if (cp[0] == '\0') /* Empty line. */
continue;
else if (c_isspace (cp[0]) || cp[0] == '%')
/* A Prolog-type comment or anything other than a declaration. */
continue;
else if (cp[0] == '/' && cp[1] == '*') /* Mercury C-type comment. */
mercury_skip_comment (&lb, inf);
else
{
is_mercury_declaration = (cp[0] == ':' && cp[1] == '-');
if (is_mercury_declaration
|| with_mercury_definitions)
{
ptrdiff_t len = mercury_pr (cp, last, lastlen);
if (0 < len)
{
/* Store the declaration to avoid generating duplicate
tags later. */
if (allocated <= len)
{
xrnew (last, len + 1, 1);
allocated = len + 1;
}
memcpyz (last, cp, len);
lastlen = len;
}
}
}
}
free (last);
}
static void
mercury_skip_comment (linebuffer *plb, FILE *inf)
{
char *cp;
do
{
for (cp = plb->buffer; *cp != '\0'; ++cp)
if (cp[0] == '*' && cp[1] == '/')
return;
readline (plb, inf);
}
while (perhaps_more_input (inf));
}
/*
* A declaration is added if it matches:
* <beginning of line>:-<whitespace><Mercury Term><whitespace>(
* If with_mercury_definitions == true, we also add:
* <beginning of line><Mercury item><whitespace>(
* or <beginning of line><Mercury item><whitespace>:-
* As for Prolog support, different arities and types are not taken into
* consideration.
* Item is added to the tags database if it doesn't match the
* name of the previous declaration.
*
* Consume a Mercury declaration.
* Return the number of bytes consumed, or 0 if there was an error.
*
* A Mercury declaration must be one of:
* :- type
* :- solver type
* :- pred
* :- func
* :- inst
* :- mode
* :- typeclass
* :- instance
* :- pragma
* :- promise
* :- initialise
* :- finalise
* :- mutable
* :- module
* :- interface
* :- implementation
* :- import_module
* :- use_module
* :- include_module
* :- end_module
* followed on the same line by an alphanumeric sequence, starting with a lower
* case letter or by a single-quoted arbitrary string.
* Single quotes can escape themselves. Backslash quotes everything.
*
* Return the size of the name of the declaration or 0 if no header was found.
* As quantifiers may precede functions or predicates, we must list them too.
*/
static const char *Mercury_decl_tags[] = {"type", "solver type", "pred",
"func", "inst", "mode", "typeclass", "instance", "pragma", "promise",
"initialise", "finalise", "mutable", "module", "interface", "implementation",
"import_module", "use_module", "include_module", "end_module", "some", "all"};
static size_t
mercury_decl (char *s, size_t pos)
{
if (s == NULL) return 0;
size_t origpos;
origpos = pos;
while (s + pos != NULL && (c_isalnum (s[pos]) || s[pos] == '_')) ++pos;
unsigned char decl_type_length = pos - origpos;
char buf[decl_type_length + 1];
memset (buf, 0, decl_type_length + 1);
/* Mercury declaration tags. Consume them, then check the declaration item
following :- is legitimate, then go on as in the prolog case. */
memcpy (buf, &s[origpos], decl_type_length);
bool found_decl_tag = false;
if (is_mercury_quantifier)
{
if (strcmp (buf, "pred") != 0 && strcmp (buf, "func") != 0) /* Bad syntax. */
return 0;
is_mercury_quantifier = false; /* Beset to base value. */
found_decl_tag = true;
}
else
{
for (int j = 0; j < sizeof (Mercury_decl_tags) / sizeof (char*); ++j)
{
if (strcmp (buf, Mercury_decl_tags[j]) == 0)
{
found_decl_tag = true;
if (strcmp (buf, "type") == 0)
is_mercury_type = true;
if (strcmp (buf, "some") == 0
|| strcmp (buf, "all") == 0)
{
is_mercury_quantifier = true;
}
break; /* Found declaration tag of rank j. */
}
else
/* 'solver type' has a blank in the middle,
so this is the hard case. */
if (strcmp (buf, "solver") == 0)
{
++pos;
while (s + pos != NULL && (c_isalnum (s[pos]) || s[pos] == '_'))
++pos;
decl_type_length = pos - origpos;
char buf2[decl_type_length + 1];
memset (buf2, 0, decl_type_length + 1);
memcpy (buf2, &s[origpos], decl_type_length);
if (strcmp (buf2, "solver type") == 0)
{
found_decl_tag = false;
break; /* Found declaration tag of rank j. */
}
}
}
}
/* If with_mercury_definitions == false
* this is a Mercury syntax error, ignoring... */
if (with_mercury_definitions)
{
if (found_decl_tag)
pos = skip_spaces (s + pos) - s; /* Skip len blanks again. */
else
/* Prolog-like behavior
* we have parsed the predicate once, yet inappropriately
* so restarting again the parsing step. */
pos = 0;
}
else
{
if (found_decl_tag)
pos = skip_spaces (s + pos) - s; /* Skip len blanks again. */
else
return 0;
}
/* From now on it is the same as for Prolog except for module dots. */
if (c_islower (s[pos]) || s[pos] == '_' )
{
/* The name is unquoted.
Do not confuse module dots with end-of-declaration dots. */
while (c_isalnum (s[pos])
|| s[pos] == '_'
|| (s[pos] == '.' /* A module dot. */
&& s + pos + 1 != NULL
&& (c_isalnum (s[pos + 1]) || s[pos + 1] == '_')))
++pos;
return pos - origpos;
}
else if (s[pos] == '\'')
{
++pos;
for (;;)
{
if (s[pos] == '\'')
{
++pos;
if (s[pos] != '\'')
break;
++pos; /* A double quote. */
}
else if (s[pos] == '\0') /* Multiline quoted atoms are ignored. */
return 0;
else if (s[pos] == '\\')
{
if (s[pos+1] == '\0')
return 0;
pos += 2;
}
else
++pos;
}
return pos - origpos;
}
else if (is_mercury_quantifier && s[pos] == '[') /* :- some [T] pred/func. */
{
for (++pos; s + pos != NULL && s[pos] != ']'; ++pos) {}
if (s + pos == NULL) return 0;
++pos;
pos = skip_spaces (s + pos) - s;
return mercury_decl (s, pos) + pos - origpos;
}
else
return 0;
}
static ptrdiff_t
mercury_pr (char *s, char *last, ptrdiff_t lastlen)
{
size_t len0 = 0;
is_mercury_type = false;
is_mercury_quantifier = false;
if (is_mercury_declaration)
{
/* Skip len0 blanks only for declarations. */
len0 = skip_spaces (s + 2) - s;
}
size_t len = mercury_decl (s , len0);
if (len == 0) return 0;
len += len0;
if (( (s[len] == '.' /* This is a statement dot, not a module dot. */
|| (s[len] == '(' && (len += 1))
|| (s[len] == ':' /* Stopping in case of a rule. */
&& s[len + 1] == '-'
&& (len += 2)))
&& (lastlen != len || memcmp (s, last, len) != 0)
)
/* Types are often declared on several lines so keeping just
the first line. */
|| is_mercury_type)
{
make_tag (s, 0, true, s, len, lineno, linecharno);
return len;
}
return 0;
}
/*
* Support for Erlang

View file

@ -16,6 +16,7 @@ HTMLSRC=$(addprefix ./html-src/,softwarelibero.html index.shtml algrthms.html so
#JAVASRC=$(addprefix ./java-src/, )
LUASRC=$(addprefix ./lua-src/,allegro.lua test.lua)
MAKESRC=$(addprefix ./make-src/,Makefile)
MERCSRC=$(addprefix ./merc-src/,accumulator.m)
OBJCSRC=$(addprefix ./objc-src/,Subprocess.h Subprocess.m PackInsp.h PackInsp.m)
OBJCPPSRC=$(addprefix ./objcpp-src/,SimpleCalc.H SimpleCalc.M)
PASSRC=$(addprefix ./pas-src/,common.pas)
@ -32,7 +33,7 @@ YSRC=$(addprefix ./y-src/,parse.y parse.c atest.y cccp.c cccp.y)
SRCS=${ADASRC} ${ASRC} ${CSRC} ${CPSRC} ${ELSRC} ${ERLSRC} ${FSRC}\
${FORTHSRC} ${GOSRC} ${HTMLSRC} ${JAVASRC} ${LUASRC} ${MAKESRC}\
${OBJCSRC} ${OBJCPPSRC} ${PASSRC} ${PHPSRC} ${PERLSRC} ${PSSRC}\
${PROLSRC} ${PYTSRC} ${RBSRC} ${RSSRC} ${SCMSRC} ${TEXSRC} ${YSRC}
${PROLSRC} ${PYTSRC} ${RBSRC} ${RSSRC} ${SCMSRC} ${TEXSRC} ${YSRC} ${MERCSRC}
NONSRCS=./f-src/entry.strange ./erl-src/lists.erl ./cp-src/clheir.hpp.gz
ETAGS_PROG=../../../lib-src/etags

File diff suppressed because it is too large Load diff