1 /* Generate doc-string file for XEmacs from source files.
2 Copyright (C) 1985, 1986, 1992, 1993, 1994 Free Software Foundation, Inc.
3 Copyright (C) 1995 Board of Trustees, University of Illinois.
4 Copyright (C) 1998, 1999 J. Kean Johnston.
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Synched up with: FSF 19.30. */
25 /* The arguments given to this program are all the C and Lisp source files
26 of XEmacs. .elc and .el and .c files are allowed.
27 A .o file can also be specified; the .c file it was made from is used.
28 This helps the makefile pass the correct list of files.
30 The results, which go to standard output or to a file
31 specified with -a or -o (-a to append, -o to start from nothing),
32 are entries containing function or variable names and their documentation.
33 Each entry starts with a ^_ character.
34 Then comes F for a function or V for a variable.
35 Then comes the function or variable name, terminated with a newline.
36 Then comes the documentation for that function or variable.
38 Added 19.15/20.1: `-i site-packages' allow installer to dump extra packages
39 without modifying Makefiles, etc.
42 #define NO_SHORTNAMES /* Tell config not to load remap.h */
47 #if __STDC__ || defined(STDC_HEADERS)
64 #endif /* WIN32_NATIVE */
67 #include <sys/param.h>
68 #endif /* not WIN32_NATIVE */
70 #if defined(WIN32_NATIVE) || defined(CYGWIN)
71 #define READ_TEXT "rt"
72 #define READ_BINARY "rb"
73 #define WRITE_BINARY "wb"
74 #define APPEND_BINARY "ab"
75 #else /* not WIN32_NATIVE */
77 #define READ_BINARY "r"
78 #define WRITE_BINARY "w"
79 #define APPEND_BINARY "a"
80 #endif /* not WIN32_NATIVE */
82 /* Stdio stream for output to the DOC file. */
92 static int scan_file (const char *filename);
93 static int read_c_string (FILE *, int, int);
94 static void write_c_args (FILE *out, const char *func, char *buf, int minargs,
96 static int scan_c_file (const char *filename, const char *mode);
97 static void skip_white (FILE *);
98 static void read_lisp_symbol (FILE *, char *);
99 static int scan_lisp_file (const char *filename, const char *mode);
101 #define C_IDENTIFIER_CHAR_P(c) \
102 (('A' <= c && c <= 'Z') || \
103 ('a' <= c && c <= 'z') || \
104 ('0' <= c && c <= '9') || \
107 /* Name this program was invoked with. */
110 /* Set to 1 if this was invoked by ellcc */
113 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
116 error (const char *s1, const char *s2)
118 fprintf (stderr, "%s: ", progname);
119 fprintf (stderr, s1, s2);
120 fprintf (stderr, "\n");
123 /* Print error message and exit. */
126 fatal (const char *s1, const char *s2)
132 /* Like malloc but get fatal error if memory is exhausted. */
135 xmalloc (unsigned int size)
137 long *result = (long *) malloc (size);
139 fatal ("virtual memory exhausted", 0);
144 next_extra_elc(char *extra_elcs)
146 static FILE *fp = NULL;
147 static char line_buf[BUFSIZ];
148 char *p = line_buf+1;
153 } else if (!(fp = fopen(extra_elcs, READ_BINARY))) {
154 /* It is not an error if this file doesn't exist. */
155 /*fatal("error opening site package file list", 0);*/
158 fgets(line_buf, BUFSIZ, fp);
162 if (!fgets(line_buf, BUFSIZ, fp)) {
168 if (strlen(p) <= 2 || strlen(p) >= (BUFSIZ - 5)) {
169 /* reject too short or too long lines */
172 p[strlen(p) - 2] = '\0';
180 main (int argc, char **argv)
185 char *extra_elcs = NULL;
191 /* Don't put CRs in the DOC file. */
194 _setmode (fileno (stdout), O_BINARY);
195 #endif /* WIN32_NATIVE */
197 /* If first two args are -o FILE, output to FILE. */
199 if (argc > i + 1 && !strcmp (argv[i], "-o"))
201 outfile = fopen (argv[i + 1], WRITE_BINARY);
204 if (argc > i + 1 && !strcmp (argv[i], "-a"))
206 outfile = fopen (argv[i + 1], APPEND_BINARY);
209 if (argc > i + 1 && !strcmp (argv[i], "-E"))
211 outfile = fopen (argv[i + 1], APPEND_BINARY);
215 if (argc > i + 1 && !strcmp (argv[i], "-d"))
221 if (argc > (i + 1) && !strcmp(argv[i], "-i")) {
222 extra_elcs = argv[i + 1];
227 fatal ("No output file specified", "");
230 fprintf (outfile, "{\n");
233 for (; i < argc; i++)
236 /* Don't process one file twice. */
237 for (j = first_infile; j < i; j++)
238 if (! strcmp (argv[i], argv[j]))
241 /* err_count seems to be {mis,un}used */
242 err_count += scan_file (argv[i]);
248 while ((p = next_extra_elc(extra_elcs)) != NULL) {
249 err_count += scan_file(p);
253 putc ('\n', outfile);
255 fprintf (outfile, "}\n\n");
257 exit (err_count > 0);
259 return err_count > 0;
262 /* Read file FILENAME and output its doc strings to outfile. */
263 /* Return 1 if file is not found, 0 if it is found. */
266 scan_file (const char *filename)
268 int len = strlen (filename);
269 if (ellcc == 0 && len > 4 && !strcmp (filename + len - 4, ".elc"))
271 Current_file_type = elc_file;
272 return scan_lisp_file (filename, READ_BINARY);
274 else if (ellcc == 0 && len > 3 && !strcmp (filename + len - 3, ".el"))
276 Current_file_type = el_file;
277 return scan_lisp_file (filename, READ_TEXT);
281 Current_file_type = c_file;
282 return scan_c_file (filename, READ_TEXT);
288 /* Skip a C string from INFILE,
289 and return the character that follows the closing ".
290 If printflag is positive, output string contents to outfile.
291 If it is negative, store contents in buf.
292 Convert escape sequences \n and \t to newline and tab;
293 discard \ followed by newline. */
295 #define MDGET do { prevc = c; c = getc (infile); } while (0)
297 read_c_string (FILE *infile, int printflag, int c_docstring)
299 register int prevc = 0, c = 0;
306 while ((c_docstring || c != '"') && c != EOF)
310 int cc = getc (infile);
318 fprintf (outfile, "\\n\\");
319 putc ('\n', outfile);
321 else if (printflag < 0)
335 fprintf (outfile, "\\n\\");
336 putc ('\n', outfile);
338 else if (printflag < 0)
351 if (!c_docstring && c == 'n')
363 if (ellcc && c == '"')
364 putc ('\\', outfile);
367 else if (printflag < 0)
372 /* look for continuation of string */
373 if (Current_file_type == c_file)
388 /* If we had a "", concatenate the two strings. */
399 /* Write to file OUT the argument names of function FUNC, whose text is in BUF.
400 MINARGS and MAXARGS are the minimum and maximum number of arguments. */
403 write_c_args (FILE *out, const char *func, char *buff, int minargs,
412 fprintf (out, "(%s", func);
414 /* XEmacs - "arguments:" is for parsing the docstring. FSF's help system
415 doesn't parse the docstring for arguments like we do, so we're also
416 going to omit the function name to preserve compatibility with elisp
417 that parses the docstring. Finally, not prefixing the arglist with
418 anything is asking for trouble because it's not uncommon to have an
419 unescaped parenthesis at the beginning of a line. --Stig */
420 fprintf (out, "arguments: (");
426 for (p = buff; *p; p++)
431 /* Add support for ANSI prototypes. Hop over
432 "Lisp_Object" string (the only C type allowed in DEFUNs) */
433 static char lo[] = "Lisp_Object";
434 if ((C_IDENTIFIER_CHAR_P (c) != in_ident) && !in_ident &&
435 (strncmp (p, lo, sizeof (lo) - 1) == 0) &&
436 isspace((unsigned char) (* (p + sizeof (lo) - 1))))
438 p += (sizeof (lo) - 1);
439 while (isspace ((unsigned char) (*p)))
444 /* Notice when we start printing a new identifier. */
445 if (C_IDENTIFIER_CHAR_P (c) != in_ident)
452 /* XEmacs - This goes along with the change above. */
456 if (minargs == 0 && maxargs > 0)
457 fprintf (out, "&optional ");
467 /* Print the C argument list as it would appear in lisp:
468 print underscores as hyphens, and print commas as spaces.
469 Collapse adjacent spaces into one. */
470 if (c == '_') c = '-';
471 if (c == ',') c = ' ';
473 /* If the C argument name ends with `_', change it to ' ',
474 to allow use of C reserved words or global symbols as Lisp args. */
475 if (c == '-' && ! C_IDENTIFIER_CHAR_P (p[1]))
480 /* If the character is carriage return, escape it for the C compiler. */
486 else if (c != ' ' || ! just_spaced)
488 if (c >= 'a' && c <= 'z')
489 /* Upcase the letter. */
494 just_spaced = (c == ' ');
500 putc ('\n', out); /* XEmacs addition */
503 /* Read through a c file. If a .o file is named,
504 the corresponding .c file is read instead.
505 Looks for DEFUN constructs such as are defined in ../src/lisp.h.
506 Accepts any word starting DEF... so it finds DEFSIMPLE and DEFPRED. */
509 scan_c_file (const char *filename, const char *mode)
514 register int defunflag;
515 register int defvarperbufferflag = 0;
516 register int defvarflag;
517 int minargs, maxargs;
518 size_t l = strlen (filename);
524 errno = ENAMETOOLONG;
531 strcpy (f, filename);
534 infile = fopen (f, mode);
536 /* No error if non-ex input file */
544 while (!feof (infile))
581 /* Note that this business doesn't apply under XEmacs.
582 DEFVAR_BUFFER_LOCAL in XEmacs behaves normally. */
583 defvarperbufferflag = (c == 'P');
596 defunflag = (c == 'U');
612 c = read_c_string (infile, -1, 0);
616 else if (defvarperbufferflag)
620 else /* For DEFSIMPLE and DEFPRED */
628 if (defunflag && (commas == 1 || commas == 2))
632 while (c == ' ' || c == '\n' || c == '\t')
637 if (commas == 2) /* pick up minargs */
638 fscanf (infile, "%d", &minargs);
639 else /* pick up maxargs */
640 if (c == 'M' || c == 'U') /* MANY || UNEVALLED */
643 fscanf (infile, "%d", &maxargs);
650 while (c == ' ' || c == '\n' || c == '\t')
653 c = read_c_string (infile, 0, 0);
654 if (defunflag | defvarflag)
668 while (c == ' ' || c == '\n' || c == '\t')
670 if (defunflag | defvarflag)
673 if (defunflag || defvarflag || c == '"')
676 fprintf (outfile, " CDOC%s(\"%s\", \"\\\n",
677 defvarflag ? "SYM" : "SUBR", buf);
681 putc (defvarflag ? 'V' : 'F', outfile);
682 fprintf (outfile, "%s\n", buf);
684 c = read_c_string (infile, 1, (defunflag || defvarflag));
686 /* If this is a defun, find the arguments and print them. If
687 this function takes MANY or UNEVALLED args, then the C source
688 won't give the names of the arguments, so we shouldn't bother
689 trying to find them. */
690 if (defunflag && maxargs != -1)
692 char argbuf[1024], *p = argbuf;
693 #if 0 /* For old DEFUN's only */
701 /* Skip into arguments. */
708 /* Copy arguments into ARGBUF. */
711 *p++ = c = getc (infile);
716 fprintf (outfile, "\\n\\\n\\n\\\n");
718 fprintf (outfile, "\n\n");
719 write_c_args (outfile, buf, argbuf, minargs, maxargs);
722 fprintf (outfile, "\\n\");\n\n");
730 /* Read a file of Lisp code, compiled or interpreted.
732 (defun NAME ARGS DOCSTRING ...)
733 (defmacro NAME ARGS DOCSTRING ...)
734 (autoload (quote NAME) FILE DOCSTRING ...)
735 (defvar NAME VALUE DOCSTRING)
736 (defconst NAME VALUE DOCSTRING)
737 (fset (quote NAME) (make-byte-code ... DOCSTRING ...))
738 (fset (quote NAME) #[... DOCSTRING ...])
739 (defalias (quote NAME) #[... DOCSTRING ...])
740 starting in column zero.
741 (quote NAME) may appear as 'NAME as well.
743 We also look for #@LENGTH CONTENTS^_ at the beginning of the line.
744 When we find that, we save it for the following defining-form,
745 and we use that instead of reading a doc string within that defining-form.
747 For defun, defmacro, and autoload, we know how to skip over the arglist.
748 For defvar, defconst, and fset we skip to the docstring with a kludgy
749 formatting convention: all docstrings must appear on the same line as the
750 initial open-paren (the one in column zero) and must contain a backslash
751 and a double-quote immediately after the initial double-quote. No newlines
752 must appear between the beginning of the form and the first double-quote.
753 The only source file that must follow this convention is loaddefs.el; aside
754 from that, it is always the .elc file that we look at, and they are no
755 problem because byte-compiler output follows this convention.
756 The NAME and DOCSTRING are output.
757 NAME is preceded by `F' for a function or `V' for a variable.
758 An entry is output only if DOCSTRING has \ newline just after the opening "
762 skip_white (FILE *infile)
765 while (c == ' ' || c == '\t' || c == '\n')
771 read_lisp_symbol (FILE *infile, char *buffer)
774 char *fillp = buffer;
781 /* FSF has *(++fillp), which is wrong. */
782 *fillp++ = getc (infile);
783 else if (c == ' ' || c == '\t' || c == '\n' || c == '(' || c == ')')
794 fprintf (stderr, "## expected a symbol, got '%c'\n", c);
800 scan_lisp_file (const char *filename, const char *mode)
804 char *saved_string = 0;
806 infile = fopen (filename, mode);
810 return 0; /* No error */
814 while (!feof (infile))
825 /* Detect a dynamic doc string and save it for the next expression. */
834 /* Read the length. */
835 while ((c = getc (infile),
836 c >= '0' && c <= '9'))
842 /* The next character is a space that is counted in the length
843 but not part of the doc string.
844 We already read it, so just ignore it. */
847 /* Read in the contents. */
848 if (saved_string != 0)
850 saved_string = (char *) xmalloc (length);
851 for (i = 0; i < length; i++)
852 saved_string[i] = getc (infile);
853 /* The last character is a ^_.
854 That is needed in the .elc file
855 but it is redundant in DOC. So get rid of it here. */
856 saved_string[length - 1] = 0;
857 /* Skip the newline. */
868 read_lisp_symbol (infile, buffer);
870 if (! strcmp (buffer, "defun") ||
871 ! strcmp (buffer, "defmacro"))
874 read_lisp_symbol (infile, buffer);
876 /* Skip the arguments: either "nil" or a list in parens */
879 if (c == 'n') /* nil */
881 if ((c = getc (infile)) != 'i' ||
882 (c = getc (infile)) != 'l')
884 fprintf (stderr, "## unparsable arglist in %s (%s)\n",
891 fprintf (stderr, "## unparsable arglist in %s (%s)\n",
900 /* If the next three characters aren't `dquote bslash newline'
901 then we're not reading a docstring.
903 if ((c = getc (infile)) != '"' ||
904 (c = getc (infile)) != '\\' ||
905 (c = getc (infile)) != '\n')
908 fprintf (stderr, "## non-docstring in %s (%s)\n",
915 else if (! strcmp (buffer, "defvar") ||
916 ! strcmp (buffer, "defconst"))
920 read_lisp_symbol (infile, buffer);
922 if (saved_string == 0)
925 /* Skip until the first newline; remember the two previous chars. */
926 while (c != '\n' && c >= 0)
928 /* #### Kludge -- Ignore any ESC x x ISO2022 sequences */
942 /* If two previous characters were " and \,
943 this is a doc string. Otherwise, there is none. */
944 if (c2 != '"' || c1 != '\\')
947 fprintf (stderr, "## non-docstring in %s (%s)\n",
955 else if (! strcmp (buffer, "fset") || ! strcmp (buffer, "defalias"))
962 read_lisp_symbol (infile, buffer);
967 fprintf (stderr, "## unparsable name in fset in %s\n",
971 read_lisp_symbol (infile, buffer);
972 if (strcmp (buffer, "quote"))
974 fprintf (stderr, "## unparsable name in fset in %s\n",
978 read_lisp_symbol (infile, buffer);
983 "## unparsable quoted name in fset in %s\n",
989 if (saved_string == 0)
991 /* Skip until the first newline; remember the two previous chars. */
992 while (c != '\n' && c >= 0)
999 /* If two previous characters were " and \,
1000 this is a doc string. Otherwise, there is none. */
1001 if (c2 != '"' || c1 != '\\')
1004 fprintf (stderr, "## non-docstring in %s (%s)\n",
1012 else if (! strcmp (buffer, "autoload"))
1017 read_lisp_symbol (infile, buffer);
1022 fprintf (stderr, "## unparsable name in autoload in %s\n",
1026 read_lisp_symbol (infile, buffer);
1027 if (strcmp (buffer, "quote"))
1029 fprintf (stderr, "## unparsable name in autoload in %s\n",
1033 read_lisp_symbol (infile, buffer);
1038 "## unparsable quoted name in autoload in %s\n",
1043 skip_white (infile);
1044 if ((c = getc (infile)) != '\"')
1046 fprintf (stderr, "## autoload of %s unparsable (%s)\n",
1050 read_c_string (infile, 0, 0);
1051 skip_white (infile);
1053 if (saved_string == 0)
1055 /* If the next three characters aren't `dquote bslash newline'
1056 then we're not reading a docstring. */
1057 if ((c = getc (infile)) != '"' ||
1058 (c = getc (infile)) != '\\' ||
1059 (c = getc (infile)) != '\n')
1062 fprintf (stderr, "## non-docstring in %s (%s)\n",
1070 #if 0 /* causes crash */
1071 else if (! strcmp (buffer, "if") ||
1072 ! strcmp (buffer, "byte-code"))
1079 fprintf (stderr, "## unrecognized top-level form, %s (%s)\n",
1085 /* At this point, we should either use the previous
1086 dynamic doc string in saved_string
1087 or gobble a doc string from the input file.
1089 In the latter case, the opening quote (and leading
1090 backslash-newline) have already been read. */
1091 putc ('\n', outfile); /* XEmacs addition */
1092 putc (037, outfile);
1093 putc (type, outfile);
1094 fprintf (outfile, "%s\n", buffer);
1097 fputs (saved_string, outfile);
1098 /* Don't use one dynamic doc string twice. */
1099 free (saved_string);
1103 read_c_string (infile, 1, 0);