mirror of
git://git.sv.gnu.org/emacs.git
synced 2025-12-05 22:20:24 -08:00
Add support for Mercury (https://mercurylang.org) in 'etags'
Tag declarations starting lines with ':-'. By default, all declarations are tagged. Optionally, first predicate or functions in clauses can be tagged as in Prolog support using '--declarations'. (Bug#47408). * lib-src/etags.c (test_objc_is_mercury, Mercury_functions) (mercury_skip_comment, mercury_decl, mercury_pr): Implement Mercury support. As Mercury and Objective-C have the same file extension .m, a heuristic test tries to detect the language. * doc/man/etags.1: Document the change. Add Mercury-specific behavior for '--declarations'. This option tags first predicates or functions in clauses in addition to declarations.
This commit is contained in:
parent
2207f9adcc
commit
5a8a5a990a
5 changed files with 2464 additions and 13 deletions
490
lib-src/etags.c
490
lib-src/etags.c
|
|
@ -142,7 +142,14 @@ University of California, as described above. */
|
|||
# define CTAGS false
|
||||
#endif
|
||||
|
||||
/* Copy to DEST from SRC (containing LEN bytes), and append a NUL byte. */
|
||||
/* Define MERCURY_HEURISTICS_RATIO as it was necessary to disambiguate
|
||||
Mercury from Objective C, which have same file extensions .m
|
||||
See comments before function test_objc_is_mercury for details. */
|
||||
#ifndef MERCURY_HEURISTICS_RATIO
|
||||
# define MERCURY_HEURISTICS_RATIO 0.5
|
||||
#endif
|
||||
|
||||
/* COPY to DEST from SRC (containing LEN bytes), and append a NUL byte. */
|
||||
static void
|
||||
memcpyz (void *dest, void const *src, ptrdiff_t len)
|
||||
{
|
||||
|
|
@ -359,6 +366,7 @@ static void HTML_labels (FILE *);
|
|||
static void Lisp_functions (FILE *);
|
||||
static void Lua_functions (FILE *);
|
||||
static void Makefile_targets (FILE *);
|
||||
static void Mercury_functions (FILE *);
|
||||
static void Pascal_functions (FILE *);
|
||||
static void Perl_functions (FILE *);
|
||||
static void PHP_functions (FILE *);
|
||||
|
|
@ -379,6 +387,7 @@ static ptrdiff_t readline_internal (linebuffer *, FILE *, char const *);
|
|||
static bool nocase_tail (const char *);
|
||||
static void get_tag (char *, char **);
|
||||
static void get_lispy_tag (char *);
|
||||
static void test_objc_is_mercury (char *, language **);
|
||||
|
||||
static void analyze_regex (char *);
|
||||
static void free_regexps (void);
|
||||
|
|
@ -684,10 +693,22 @@ static const char Makefile_help [] =
|
|||
"In makefiles, targets are tags; additionally, variables are tags\n\
|
||||
unless you specify '--no-globals'.";
|
||||
|
||||
/* Mercury and Objective C share the same .m file extensions. */
|
||||
static const char *Mercury_suffixes [] =
|
||||
{"m",
|
||||
NULL};
|
||||
static const char Mercury_help [] =
|
||||
"In Mercury code, tags are all declarations beginning a line with ':-'\n\
|
||||
and optionally Prolog-like definitions (first rule for a predicate or \
|
||||
function).\n\
|
||||
To enable this behavior, run etags using --declarations.";
|
||||
static bool with_mercury_definitions = false;
|
||||
float mercury_heuristics_ratio = MERCURY_HEURISTICS_RATIO;
|
||||
|
||||
static const char *Objc_suffixes [] =
|
||||
{ "lm", /* Objective lex file */
|
||||
"m", /* Objective C file */
|
||||
NULL };
|
||||
{ "lm", /* Objective lex file */
|
||||
"m", /* By default, Objective C file will be assumed. */
|
||||
NULL};
|
||||
static const char Objc_help [] =
|
||||
"In Objective C code, tags include Objective C definitions for classes,\n\
|
||||
class categories, methods and protocols. Tags for variables and\n\
|
||||
|
|
@ -831,7 +852,9 @@ static language lang_names [] =
|
|||
{ "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
|
||||
{ "lua", Lua_help,Lua_functions,Lua_suffixes,NULL,Lua_interpreters},
|
||||
{ "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
|
||||
/* objc listed before mercury as it is a better default for .m extensions. */
|
||||
{ "objc", Objc_help, plain_C_entries, Objc_suffixes },
|
||||
{ "mercury", Mercury_help, Mercury_functions, Mercury_suffixes },
|
||||
{ "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
|
||||
{ "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
|
||||
{ "php", PHP_help, PHP_functions, PHP_suffixes },
|
||||
|
|
@ -958,6 +981,9 @@ Relative ones are stored relative to the output file's directory.\n");
|
|||
puts
|
||||
("\tand create tags for extern variables unless --no-globals is used.");
|
||||
|
||||
puts ("In Mercury, tag both declarations starting a line with ':-' and first\n\
|
||||
predicates or functions in clauses.");
|
||||
|
||||
if (CTAGS)
|
||||
puts ("-d, --defines\n\
|
||||
Create tag entries for C #define constants and enum constants, too.");
|
||||
|
|
@ -1783,6 +1809,11 @@ find_entries (FILE *inf)
|
|||
if (parser == NULL)
|
||||
{
|
||||
lang = get_language_from_filename (curfdp->infname, true);
|
||||
|
||||
/* Disambiguate file names between Objc and Mercury */
|
||||
if (lang != NULL && strcmp (lang->name, "objc") == 0)
|
||||
test_objc_is_mercury (curfdp->infname, &lang);
|
||||
|
||||
if (lang != NULL && lang->function != NULL)
|
||||
{
|
||||
curfdp->lang = lang;
|
||||
|
|
@ -6070,6 +6101,457 @@ prolog_atom (char *s, size_t pos)
|
|||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Support for Mercury
|
||||
*
|
||||
* Assumes that the declarationa starts at column 0.
|
||||
* Original code by Sunichirou Sugou (1989) for Prolog.
|
||||
* Rewritten by Anders Lindgren (1996) for Prolog.
|
||||
* Adapted by Fabrice Nicol (2021) for Mercury.
|
||||
* Note: Prolog-support behavior is preserved if
|
||||
* --declarations is used, corresponding to
|
||||
* with_mercury_definitions=true.
|
||||
*/
|
||||
|
||||
static ptrdiff_t mercury_pr (char *, char *, ptrdiff_t);
|
||||
static void mercury_skip_comment (linebuffer *, FILE *);
|
||||
static bool is_mercury_type = false;
|
||||
static bool is_mercury_quantifier = false;
|
||||
static bool is_mercury_declaration = false;
|
||||
|
||||
/*
|
||||
* Objective-C and Mercury have identical file extension .m
|
||||
* To disambiguate between Objective C and Mercury, parse file
|
||||
* with the following heuristics hook:
|
||||
* - if line starts with :- choose Mercury unconditionally,
|
||||
* - if line starts with #, @, choose Objective-C,
|
||||
* - otherwise compute the following ratio:
|
||||
*
|
||||
* r = (number of lines with :-
|
||||
* or % in non-commented parts or . at trimmed EOL)
|
||||
* / (number of lines - number of lines starting by any amount
|
||||
* of whitespace, optionally followed by comment(s))
|
||||
*
|
||||
* Note: strings are neglected in counts.
|
||||
*
|
||||
* If r > mercury_heuristics_ratio, choose Mercury.
|
||||
* Experimental tests show that a possibly optimal default value for
|
||||
* this floor value is around 0.5. This is the default value for
|
||||
* MERCURY_HEURISTICS_RATIO, defined in the first lines of this file.
|
||||
* The closer r to 0.5, the closer the source code to pure Prolog.
|
||||
* Idiomatic Mercury is scored either with r = 1.0 or higher.
|
||||
* Objective-C is scored with r = 0.0. When this fails, the r-score never
|
||||
* rose above 0.1 in Objective-C tests.
|
||||
*/
|
||||
|
||||
static void
|
||||
test_objc_is_mercury (char *this_file, language **lang)
|
||||
{
|
||||
if (this_file == NULL) return;
|
||||
FILE* fp = fopen (this_file, "r");
|
||||
if (fp == NULL)
|
||||
pfatal (this_file);
|
||||
|
||||
bool blank_line = false; /* Line starting with any amount of white space
|
||||
followed by optional comment(s). */
|
||||
bool commented_line = false;
|
||||
bool found_dot = false;
|
||||
bool only_space_before = true;
|
||||
bool start_of_line = true;
|
||||
int c;
|
||||
intmax_t lines = 1;
|
||||
intmax_t mercury_dots = 0;
|
||||
intmax_t percentage_signs = 0;
|
||||
intmax_t rule_signs = 0;
|
||||
float ratio = 0;
|
||||
|
||||
while ((c = fgetc (fp)) != EOF)
|
||||
{
|
||||
switch (c)
|
||||
{
|
||||
case '\n':
|
||||
if (! blank_line) ++lines;
|
||||
blank_line = true;
|
||||
commented_line = false;
|
||||
start_of_line = true;
|
||||
if (found_dot) ++mercury_dots;
|
||||
found_dot = false;
|
||||
only_space_before = true;
|
||||
break;
|
||||
case '.':
|
||||
found_dot = ! commented_line;
|
||||
only_space_before = false;
|
||||
break;
|
||||
case '%': /* More frequent in Mercury. May be modulo in Obj.-C. */
|
||||
if (! commented_line)
|
||||
{
|
||||
++percentage_signs;
|
||||
/* Cannot tell if it is a comment or modulo yet for sure.
|
||||
Yet works for heuristic purposes. */
|
||||
commented_line = true;
|
||||
}
|
||||
found_dot = false;
|
||||
start_of_line = false;
|
||||
only_space_before = false;
|
||||
break;
|
||||
case '/':
|
||||
{
|
||||
int d = fgetc (fp);
|
||||
found_dot = false;
|
||||
only_space_before = false;
|
||||
if (! commented_line)
|
||||
{
|
||||
if (d == '*')
|
||||
commented_line = true;
|
||||
else
|
||||
/* If d == '/', cannot tell if it is an Obj.-C comment:
|
||||
may be Mercury integ. division. */
|
||||
blank_line = false;
|
||||
}
|
||||
}
|
||||
FALLTHROUGH;
|
||||
case ' ':
|
||||
case '\t':
|
||||
start_of_line = false;
|
||||
break;
|
||||
case ':':
|
||||
c = fgetc (fp);
|
||||
if (start_of_line)
|
||||
{
|
||||
if (c == '-')
|
||||
{
|
||||
ratio = 1.0; /* Failsafe, not an operator in Obj.-C. */
|
||||
goto out;
|
||||
}
|
||||
start_of_line = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* p :- q. Frequent in Mercury.
|
||||
Rare or in quoted exprs in Obj.-C. */
|
||||
if (c == '-' && ! commented_line)
|
||||
++rule_signs;
|
||||
}
|
||||
blank_line = false;
|
||||
found_dot = false;
|
||||
only_space_before = false;
|
||||
break;
|
||||
case '@':
|
||||
case '#':
|
||||
if (start_of_line || only_space_before)
|
||||
{
|
||||
ratio = 0.0;
|
||||
goto out;
|
||||
}
|
||||
FALLTHROUGH;
|
||||
default:
|
||||
start_of_line = false;
|
||||
blank_line = false;
|
||||
found_dot = false;
|
||||
only_space_before = false;
|
||||
}
|
||||
}
|
||||
|
||||
/* Fallback heuristic test. Not failsafe but errless in pratice. */
|
||||
ratio = ((float) rule_signs + percentage_signs + mercury_dots) / lines;
|
||||
|
||||
out:
|
||||
if (fclose (fp) == EOF)
|
||||
pfatal (this_file);
|
||||
|
||||
if (ratio > mercury_heuristics_ratio)
|
||||
{
|
||||
/* Change the language from Objective C to Mercury. */
|
||||
static language lang0 = { "mercury", Mercury_help, Mercury_functions,
|
||||
Mercury_suffixes };
|
||||
*lang = &lang0;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
Mercury_functions (FILE *inf)
|
||||
{
|
||||
char *cp, *last = NULL;
|
||||
ptrdiff_t lastlen = 0, allocated = 0;
|
||||
if (declarations) with_mercury_definitions = true;
|
||||
|
||||
LOOP_ON_INPUT_LINES (inf, lb, cp)
|
||||
{
|
||||
if (cp[0] == '\0') /* Empty line. */
|
||||
continue;
|
||||
else if (c_isspace (cp[0]) || cp[0] == '%')
|
||||
/* A Prolog-type comment or anything other than a declaration. */
|
||||
continue;
|
||||
else if (cp[0] == '/' && cp[1] == '*') /* Mercury C-type comment. */
|
||||
mercury_skip_comment (&lb, inf);
|
||||
else
|
||||
{
|
||||
is_mercury_declaration = (cp[0] == ':' && cp[1] == '-');
|
||||
|
||||
if (is_mercury_declaration
|
||||
|| with_mercury_definitions)
|
||||
{
|
||||
ptrdiff_t len = mercury_pr (cp, last, lastlen);
|
||||
if (0 < len)
|
||||
{
|
||||
/* Store the declaration to avoid generating duplicate
|
||||
tags later. */
|
||||
if (allocated <= len)
|
||||
{
|
||||
xrnew (last, len + 1, 1);
|
||||
allocated = len + 1;
|
||||
}
|
||||
memcpyz (last, cp, len);
|
||||
lastlen = len;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
free (last);
|
||||
}
|
||||
|
||||
static void
|
||||
mercury_skip_comment (linebuffer *plb, FILE *inf)
|
||||
{
|
||||
char *cp;
|
||||
|
||||
do
|
||||
{
|
||||
for (cp = plb->buffer; *cp != '\0'; ++cp)
|
||||
if (cp[0] == '*' && cp[1] == '/')
|
||||
return;
|
||||
readline (plb, inf);
|
||||
}
|
||||
while (perhaps_more_input (inf));
|
||||
}
|
||||
|
||||
/*
|
||||
* A declaration is added if it matches:
|
||||
* <beginning of line>:-<whitespace><Mercury Term><whitespace>(
|
||||
* If with_mercury_definitions == true, we also add:
|
||||
* <beginning of line><Mercury item><whitespace>(
|
||||
* or <beginning of line><Mercury item><whitespace>:-
|
||||
* As for Prolog support, different arities and types are not taken into
|
||||
* consideration.
|
||||
* Item is added to the tags database if it doesn't match the
|
||||
* name of the previous declaration.
|
||||
*
|
||||
* Consume a Mercury declaration.
|
||||
* Return the number of bytes consumed, or 0 if there was an error.
|
||||
*
|
||||
* A Mercury declaration must be one of:
|
||||
* :- type
|
||||
* :- solver type
|
||||
* :- pred
|
||||
* :- func
|
||||
* :- inst
|
||||
* :- mode
|
||||
* :- typeclass
|
||||
* :- instance
|
||||
* :- pragma
|
||||
* :- promise
|
||||
* :- initialise
|
||||
* :- finalise
|
||||
* :- mutable
|
||||
* :- module
|
||||
* :- interface
|
||||
* :- implementation
|
||||
* :- import_module
|
||||
* :- use_module
|
||||
* :- include_module
|
||||
* :- end_module
|
||||
* followed on the same line by an alphanumeric sequence, starting with a lower
|
||||
* case letter or by a single-quoted arbitrary string.
|
||||
* Single quotes can escape themselves. Backslash quotes everything.
|
||||
*
|
||||
* Return the size of the name of the declaration or 0 if no header was found.
|
||||
* As quantifiers may precede functions or predicates, we must list them too.
|
||||
*/
|
||||
|
||||
static const char *Mercury_decl_tags[] = {"type", "solver type", "pred",
|
||||
"func", "inst", "mode", "typeclass", "instance", "pragma", "promise",
|
||||
"initialise", "finalise", "mutable", "module", "interface", "implementation",
|
||||
"import_module", "use_module", "include_module", "end_module", "some", "all"};
|
||||
|
||||
static size_t
|
||||
mercury_decl (char *s, size_t pos)
|
||||
{
|
||||
if (s == NULL) return 0;
|
||||
|
||||
size_t origpos;
|
||||
origpos = pos;
|
||||
|
||||
while (s + pos != NULL && (c_isalnum (s[pos]) || s[pos] == '_')) ++pos;
|
||||
|
||||
unsigned char decl_type_length = pos - origpos;
|
||||
char buf[decl_type_length + 1];
|
||||
memset (buf, 0, decl_type_length + 1);
|
||||
|
||||
/* Mercury declaration tags. Consume them, then check the declaration item
|
||||
following :- is legitimate, then go on as in the prolog case. */
|
||||
|
||||
memcpy (buf, &s[origpos], decl_type_length);
|
||||
|
||||
bool found_decl_tag = false;
|
||||
|
||||
if (is_mercury_quantifier)
|
||||
{
|
||||
if (strcmp (buf, "pred") != 0 && strcmp (buf, "func") != 0) /* Bad syntax. */
|
||||
return 0;
|
||||
is_mercury_quantifier = false; /* Beset to base value. */
|
||||
found_decl_tag = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int j = 0; j < sizeof (Mercury_decl_tags) / sizeof (char*); ++j)
|
||||
{
|
||||
if (strcmp (buf, Mercury_decl_tags[j]) == 0)
|
||||
{
|
||||
found_decl_tag = true;
|
||||
if (strcmp (buf, "type") == 0)
|
||||
is_mercury_type = true;
|
||||
|
||||
if (strcmp (buf, "some") == 0
|
||||
|| strcmp (buf, "all") == 0)
|
||||
{
|
||||
is_mercury_quantifier = true;
|
||||
}
|
||||
|
||||
break; /* Found declaration tag of rank j. */
|
||||
}
|
||||
else
|
||||
/* 'solver type' has a blank in the middle,
|
||||
so this is the hard case. */
|
||||
if (strcmp (buf, "solver") == 0)
|
||||
{
|
||||
++pos;
|
||||
while (s + pos != NULL && (c_isalnum (s[pos]) || s[pos] == '_'))
|
||||
++pos;
|
||||
|
||||
decl_type_length = pos - origpos;
|
||||
char buf2[decl_type_length + 1];
|
||||
memset (buf2, 0, decl_type_length + 1);
|
||||
memcpy (buf2, &s[origpos], decl_type_length);
|
||||
|
||||
if (strcmp (buf2, "solver type") == 0)
|
||||
{
|
||||
found_decl_tag = false;
|
||||
break; /* Found declaration tag of rank j. */
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* If with_mercury_definitions == false
|
||||
* this is a Mercury syntax error, ignoring... */
|
||||
|
||||
if (with_mercury_definitions)
|
||||
{
|
||||
if (found_decl_tag)
|
||||
pos = skip_spaces (s + pos) - s; /* Skip len blanks again. */
|
||||
else
|
||||
/* Prolog-like behavior
|
||||
* we have parsed the predicate once, yet inappropriately
|
||||
* so restarting again the parsing step. */
|
||||
pos = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (found_decl_tag)
|
||||
pos = skip_spaces (s + pos) - s; /* Skip len blanks again. */
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* From now on it is the same as for Prolog except for module dots. */
|
||||
|
||||
if (c_islower (s[pos]) || s[pos] == '_' )
|
||||
{
|
||||
/* The name is unquoted.
|
||||
Do not confuse module dots with end-of-declaration dots. */
|
||||
|
||||
while (c_isalnum (s[pos])
|
||||
|| s[pos] == '_'
|
||||
|| (s[pos] == '.' /* A module dot. */
|
||||
&& s + pos + 1 != NULL
|
||||
&& (c_isalnum (s[pos + 1]) || s[pos + 1] == '_')))
|
||||
++pos;
|
||||
|
||||
return pos - origpos;
|
||||
}
|
||||
else if (s[pos] == '\'')
|
||||
{
|
||||
++pos;
|
||||
for (;;)
|
||||
{
|
||||
if (s[pos] == '\'')
|
||||
{
|
||||
++pos;
|
||||
if (s[pos] != '\'')
|
||||
break;
|
||||
++pos; /* A double quote. */
|
||||
}
|
||||
else if (s[pos] == '\0') /* Multiline quoted atoms are ignored. */
|
||||
return 0;
|
||||
else if (s[pos] == '\\')
|
||||
{
|
||||
if (s[pos+1] == '\0')
|
||||
return 0;
|
||||
pos += 2;
|
||||
}
|
||||
else
|
||||
++pos;
|
||||
}
|
||||
return pos - origpos;
|
||||
}
|
||||
else if (is_mercury_quantifier && s[pos] == '[') /* :- some [T] pred/func. */
|
||||
{
|
||||
for (++pos; s + pos != NULL && s[pos] != ']'; ++pos) {}
|
||||
if (s + pos == NULL) return 0;
|
||||
++pos;
|
||||
pos = skip_spaces (s + pos) - s;
|
||||
return mercury_decl (s, pos) + pos - origpos;
|
||||
}
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ptrdiff_t
|
||||
mercury_pr (char *s, char *last, ptrdiff_t lastlen)
|
||||
{
|
||||
size_t len0 = 0;
|
||||
is_mercury_type = false;
|
||||
is_mercury_quantifier = false;
|
||||
|
||||
if (is_mercury_declaration)
|
||||
{
|
||||
/* Skip len0 blanks only for declarations. */
|
||||
len0 = skip_spaces (s + 2) - s;
|
||||
}
|
||||
|
||||
size_t len = mercury_decl (s , len0);
|
||||
if (len == 0) return 0;
|
||||
len += len0;
|
||||
|
||||
if (( (s[len] == '.' /* This is a statement dot, not a module dot. */
|
||||
|| (s[len] == '(' && (len += 1))
|
||||
|| (s[len] == ':' /* Stopping in case of a rule. */
|
||||
&& s[len + 1] == '-'
|
||||
&& (len += 2)))
|
||||
&& (lastlen != len || memcmp (s, last, len) != 0)
|
||||
)
|
||||
/* Types are often declared on several lines so keeping just
|
||||
the first line. */
|
||||
|| is_mercury_type)
|
||||
{
|
||||
make_tag (s, 0, true, s, len, lineno, linecharno);
|
||||
return len;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Support for Erlang
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue