1 /* Generate doc-string file for XEmacs from source files.
2 Copyright (C) 1985, 1986, 1992, 1993, 1994 Free Software Foundation, Inc.
3 Copyright (C) 1995 Board of Trustees, University of Illinois.
4 Copyright (C) 1998, 1999 J. Kean Johnston.
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Synched up with: FSF 19.30. */
25 /* The arguments given to this program are all the C and Lisp source files
26 of XEmacs. .elc and .el and .c files are allowed.
27 A .o file can also be specified; the .c file it was made from is used.
28 This helps the makefile pass the correct list of files.
30 The results, which go to standard output or to a file
31 specified with -a or -o (-a to append, -o to start from nothing),
32 are entries containing function or variable names and their documentation.
33 Each entry starts with a ^_ character.
34 Then comes F for a function or V for a variable.
35 Then comes the function or variable name, terminated with a newline.
36 Then comes the documentation for that function or variable.
38 Added 19.15/20.1: `-i site-packages' allow installer to dump extra packages
39 without modifying Makefiles, etc.
42 #define NO_SHORTNAMES /* Tell config not to load remap.h */
47 #if __STDC__ || defined(STDC_HEADERS)
64 #endif /* WIN32_NATIVE */
67 #include <sys/param.h>
68 #endif /* not WIN32_NATIVE */
70 #if defined(WIN32_NATIVE) || defined(CYGWIN)
71 #define READ_TEXT "rt"
72 #define READ_BINARY "rb"
73 #define WRITE_BINARY "wb"
74 #define APPEND_BINARY "ab"
75 #else /* not WIN32_NATIVE */
77 #define READ_BINARY "r"
78 #define WRITE_BINARY "w"
79 #define APPEND_BINARY "a"
80 #endif /* not WIN32_NATIVE */
82 /* Stdio stream for output to the DOC file. */
92 static int scan_file (const char *filename);
93 static int read_c_string (FILE *, int, int);
94 static void write_c_args (FILE *out, const char *func, char *buf, int minargs,
96 static int scan_c_file (const char *filename, const char *mode);
97 static void skip_white (FILE *);
98 static void read_lisp_symbol (FILE *, char *);
99 static int scan_lisp_file (const char *filename, const char *mode);
101 #define C_IDENTIFIER_CHAR_P(c) \
102 (('A' <= c && c <= 'Z') || \
103 ('a' <= c && c <= 'z') || \
104 ('0' <= c && c <= '9') || \
107 /* Name this program was invoked with. */
110 /* Set to 1 if this was invoked by ellcc */
113 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
116 error (const char *s1, const char *s2)
118 fprintf (stderr, "%s: ", progname);
119 fprintf (stderr, s1, s2);
120 fprintf (stderr, "\n");
123 /* Print error message and exit. */
126 fatal (const char *s1, const char *s2)
132 /* Like malloc but get fatal error if memory is exhausted. */
135 xmalloc (unsigned int size)
137 long *result = (long *) malloc (size);
139 fatal ("virtual memory exhausted", 0);
144 next_extra_elc(char *extra_elcs)
146 static FILE *fp = NULL;
147 static char line_buf[BUFSIZ];
148 char *p = line_buf+1;
153 } else if (!(fp = fopen(extra_elcs, READ_BINARY))) {
154 /* It is not an error if this file doesn't exist. */
155 /*fatal("error opening site package file list", 0);*/
158 fgets(line_buf, BUFSIZ, fp);
162 if (!fgets(line_buf, BUFSIZ, fp)) {
168 if (strlen(p) <= 2 || strlen(p) >= (BUFSIZ - 5)) {
169 /* reject too short or too long lines */
172 p[strlen(p) - 2] = '\0';
180 main (int argc, char **argv)
185 char *extra_elcs = NULL;
191 /* Don't put CRs in the DOC file. */
194 _setmode (fileno (stdout), O_BINARY);
195 #endif /* WIN32_NATIVE */
197 /* If first two args are -o FILE, output to FILE. */
199 if (argc > i + 1 && !strcmp (argv[i], "-o"))
201 outfile = fopen (argv[i + 1], WRITE_BINARY);
204 if (argc > i + 1 && !strcmp (argv[i], "-a"))
206 outfile = fopen (argv[i + 1], APPEND_BINARY);
209 if (argc > i + 1 && !strcmp (argv[i], "-E"))
211 outfile = fopen (argv[i + 1], APPEND_BINARY);
215 if (argc > i + 1 && !strcmp (argv[i], "-d"))
221 if (argc > (i + 1) && !strcmp(argv[i], "-i")) {
222 extra_elcs = argv[i + 1];
227 fatal ("No output file specified", "");
230 fprintf (outfile, "{\n");
233 for (; i < argc; i++)
236 /* Don't process one file twice. */
237 for (j = first_infile; j < i; j++)
238 if (! strcmp (argv[i], argv[j]))
241 /* err_count seems to be {mis,un}used */
242 err_count += scan_file (argv[i]);
248 while ((p = next_extra_elc(extra_elcs)) != NULL) {
249 err_count += scan_file(p);
253 putc ('\n', outfile);
255 fprintf (outfile, "}\n\n");
257 exit (err_count > 0);
259 return err_count > 0;
262 /* Read file FILENAME and output its doc strings to outfile. */
263 /* Return 1 if file is not found, 0 if it is found. */
266 scan_file (const char *filename)
268 int len = strlen (filename);
269 if (ellcc == 0 && len > 4 && !strcmp (filename + len - 4, ".elc"))
271 Current_file_type = elc_file;
272 return scan_lisp_file (filename, READ_BINARY);
274 else if (ellcc == 0 && len > 3 && !strcmp (filename + len - 3, ".el"))
276 Current_file_type = el_file;
277 return scan_lisp_file (filename, READ_TEXT);
281 Current_file_type = c_file;
282 return scan_c_file (filename, READ_TEXT);
288 /* Skip a C string from INFILE,
289 and return the character that follows the closing ".
290 If printflag is positive, output string contents to outfile.
291 If it is negative, store contents in buf.
292 Convert escape sequences \n and \t to newline and tab;
293 discard \ followed by newline. */
295 #define MDGET do { prevc = c; c = getc (infile); } while (0)
297 read_c_string (FILE *infile, int printflag, int c_docstring)
299 register int prevc = 0, c = 0;
306 while ((c_docstring || c != '"') && c != EOF)
310 int cc = getc (infile);
318 fprintf (outfile, "\\n\\");
319 putc ('\n', outfile);
321 else if (printflag < 0)
335 fprintf (outfile, "\\n\\");
336 putc ('\n', outfile);
338 else if (printflag < 0)
351 if (!c_docstring && c == 'n')
363 if (ellcc && c == '"')
364 putc ('\\', outfile);
367 else if (printflag < 0)
372 /* look for continuation of string */
373 if (Current_file_type == c_file)
388 /* If we had a "", concatenate the two strings. */
399 /* Write to file OUT the argument names of function FUNC, whose text is in BUF.
400 MINARGS and MAXARGS are the minimum and maximum number of arguments. */
403 write_c_args (FILE *out, const char *func, char *buff, int minargs,
412 fprintf (out, "(%s", func);
414 /* XEmacs - "arguments:" is for parsing the docstring. FSF's help system
415 doesn't parse the docstring for arguments like we do, so we're also
416 going to omit the function name to preserve compatibility with elisp
417 that parses the docstring. Finally, not prefixing the arglist with
418 anything is asking for trouble because it's not uncommon to have an
419 unescaped parenthesis at the beginning of a line. --Stig */
420 fprintf (out, "arguments: (");
426 for (p = buff; *p; p++)
431 /* Add support for ANSI prototypes. Hop over
432 "Lisp_Object" string (the only C type allowed in DEFUNs) */
433 static char lo[] = "Lisp_Object";
434 if ((C_IDENTIFIER_CHAR_P (c) != in_ident) && !in_ident &&
435 (strncmp (p, lo, sizeof (lo) - 1) == 0) &&
436 isspace((unsigned char) (* (p + sizeof (lo) - 1))))
438 p += (sizeof (lo) - 1);
439 while (isspace ((unsigned char) (*p)))
444 /* Notice when we start printing a new identifier. */
445 if (C_IDENTIFIER_CHAR_P (c) != in_ident)
452 /* XEmacs - This goes along with the change above. */
456 if (minargs == 0 && maxargs > 0)
457 fprintf (out, "&optional ");
467 /* Print the C argument list as it would appear in lisp:
468 print underscores as hyphens, and print commas as spaces.
469 Collapse adjacent spaces into one. */
470 if (c == '_') c = '-';
471 if (c == ',') c = ' ';
473 /* If the C argument name ends with `_', change it to ' ',
474 to allow use of C reserved words or global symbols as Lisp args. */
475 if (c == '-' && ! C_IDENTIFIER_CHAR_P (p[1]))
480 else if (c != ' ' || ! just_spaced)
482 if (c >= 'a' && c <= 'z')
483 /* Upcase the letter. */
488 just_spaced = (c == ' ');
494 putc ('\n', out); /* XEmacs addition */
497 /* Read through a c file. If a .o file is named,
498 the corresponding .c file is read instead.
499 Looks for DEFUN constructs such as are defined in ../src/lisp.h.
500 Accepts any word starting DEF... so it finds DEFSIMPLE and DEFPRED. */
503 scan_c_file (const char *filename, const char *mode)
508 register int defunflag;
509 register int defvarperbufferflag = 0;
510 register int defvarflag;
511 int minargs, maxargs;
512 int l = strlen (filename);
518 errno = ENAMETOOLONG;
525 strcpy (f, filename);
528 infile = fopen (f, mode);
530 /* No error if non-ex input file */
538 while (!feof (infile))
575 /* Note that this business doesn't apply under XEmacs.
576 DEFVAR_BUFFER_LOCAL in XEmacs behaves normally. */
577 defvarperbufferflag = (c == 'P');
590 defunflag = (c == 'U');
606 c = read_c_string (infile, -1, 0);
610 else if (defvarperbufferflag)
614 else /* For DEFSIMPLE and DEFPRED */
622 if (defunflag && (commas == 1 || commas == 2))
626 while (c == ' ' || c == '\n' || c == '\t')
631 if (commas == 2) /* pick up minargs */
632 fscanf (infile, "%d", &minargs);
633 else /* pick up maxargs */
634 if (c == 'M' || c == 'U') /* MANY || UNEVALLED */
637 fscanf (infile, "%d", &maxargs);
644 while (c == ' ' || c == '\n' || c == '\t')
647 c = read_c_string (infile, 0, 0);
648 if (defunflag | defvarflag)
662 while (c == ' ' || c == '\n' || c == '\t')
664 if (defunflag | defvarflag)
667 if (defunflag || defvarflag || c == '"')
670 fprintf (outfile, " CDOC%s(\"%s\", \"\\\n",
671 defvarflag ? "SYM" : "SUBR", buf);
675 putc (defvarflag ? 'V' : 'F', outfile);
676 fprintf (outfile, "%s\n", buf);
678 c = read_c_string (infile, 1, (defunflag || defvarflag));
680 /* If this is a defun, find the arguments and print them. If
681 this function takes MANY or UNEVALLED args, then the C source
682 won't give the names of the arguments, so we shouldn't bother
683 trying to find them. */
684 if (defunflag && maxargs != -1)
686 char argbuf[1024], *p = argbuf;
687 #if 0 /* For old DEFUN's only */
695 /* Skip into arguments. */
702 /* Copy arguments into ARGBUF. */
705 *p++ = c = getc (infile);
710 fprintf (outfile, "\\n\\\n\\n\\\n");
712 fprintf (outfile, "\n\n");
713 write_c_args (outfile, buf, argbuf, minargs, maxargs);
716 fprintf (outfile, "\\n\");\n\n");
724 /* Read a file of Lisp code, compiled or interpreted.
726 (defun NAME ARGS DOCSTRING ...)
727 (defmacro NAME ARGS DOCSTRING ...)
728 (autoload (quote NAME) FILE DOCSTRING ...)
729 (defvar NAME VALUE DOCSTRING)
730 (defconst NAME VALUE DOCSTRING)
731 (fset (quote NAME) (make-byte-code ... DOCSTRING ...))
732 (fset (quote NAME) #[... DOCSTRING ...])
733 (defalias (quote NAME) #[... DOCSTRING ...])
734 starting in column zero.
735 (quote NAME) may appear as 'NAME as well.
737 We also look for #@LENGTH CONTENTS^_ at the beginning of the line.
738 When we find that, we save it for the following defining-form,
739 and we use that instead of reading a doc string within that defining-form.
741 For defun, defmacro, and autoload, we know how to skip over the arglist.
742 For defvar, defconst, and fset we skip to the docstring with a kludgy
743 formatting convention: all docstrings must appear on the same line as the
744 initial open-paren (the one in column zero) and must contain a backslash
745 and a double-quote immediately after the initial double-quote. No newlines
746 must appear between the beginning of the form and the first double-quote.
747 The only source file that must follow this convention is loaddefs.el; aside
748 from that, it is always the .elc file that we look at, and they are no
749 problem because byte-compiler output follows this convention.
750 The NAME and DOCSTRING are output.
751 NAME is preceded by `F' for a function or `V' for a variable.
752 An entry is output only if DOCSTRING has \ newline just after the opening "
756 skip_white (FILE *infile)
759 while (c == ' ' || c == '\t' || c == '\n')
765 read_lisp_symbol (FILE *infile, char *buffer)
768 char *fillp = buffer;
775 /* FSF has *(++fillp), which is wrong. */
776 *fillp++ = getc (infile);
777 else if (c == ' ' || c == '\t' || c == '\n' || c == '(' || c == ')')
788 fprintf (stderr, "## expected a symbol, got '%c'\n", c);
794 scan_lisp_file (const char *filename, const char *mode)
798 char *saved_string = 0;
800 infile = fopen (filename, mode);
804 return 0; /* No error */
808 while (!feof (infile))
819 /* Detect a dynamic doc string and save it for the next expression. */
828 /* Read the length. */
829 while ((c = getc (infile),
830 c >= '0' && c <= '9'))
836 /* The next character is a space that is counted in the length
837 but not part of the doc string.
838 We already read it, so just ignore it. */
841 /* Read in the contents. */
842 if (saved_string != 0)
844 saved_string = (char *) xmalloc (length);
845 for (i = 0; i < length; i++)
846 saved_string[i] = getc (infile);
847 /* The last character is a ^_.
848 That is needed in the .elc file
849 but it is redundant in DOC. So get rid of it here. */
850 saved_string[length - 1] = 0;
851 /* Skip the newline. */
862 read_lisp_symbol (infile, buffer);
864 if (! strcmp (buffer, "defun") ||
865 ! strcmp (buffer, "defmacro"))
868 read_lisp_symbol (infile, buffer);
870 /* Skip the arguments: either "nil" or a list in parens */
873 if (c == 'n') /* nil */
875 if ((c = getc (infile)) != 'i' ||
876 (c = getc (infile)) != 'l')
878 fprintf (stderr, "## unparsable arglist in %s (%s)\n",
885 fprintf (stderr, "## unparsable arglist in %s (%s)\n",
894 /* If the next three characters aren't `dquote bslash newline'
895 then we're not reading a docstring.
897 if ((c = getc (infile)) != '"' ||
898 (c = getc (infile)) != '\\' ||
899 (c = getc (infile)) != '\n')
902 fprintf (stderr, "## non-docstring in %s (%s)\n",
909 else if (! strcmp (buffer, "defvar") ||
910 ! strcmp (buffer, "defconst"))
914 read_lisp_symbol (infile, buffer);
916 if (saved_string == 0)
919 /* Skip until the first newline; remember the two previous chars. */
920 while (c != '\n' && c >= 0)
922 /* #### Kludge -- Ignore any ESC x x ISO2022 sequences */
936 /* If two previous characters were " and \,
937 this is a doc string. Otherwise, there is none. */
938 if (c2 != '"' || c1 != '\\')
941 fprintf (stderr, "## non-docstring in %s (%s)\n",
949 else if (! strcmp (buffer, "fset") || ! strcmp (buffer, "defalias"))
956 read_lisp_symbol (infile, buffer);
961 fprintf (stderr, "## unparsable name in fset in %s\n",
965 read_lisp_symbol (infile, buffer);
966 if (strcmp (buffer, "quote"))
968 fprintf (stderr, "## unparsable name in fset in %s\n",
972 read_lisp_symbol (infile, buffer);
977 "## unparsable quoted name in fset in %s\n",
983 if (saved_string == 0)
985 /* Skip until the first newline; remember the two previous chars. */
986 while (c != '\n' && c >= 0)
993 /* If two previous characters were " and \,
994 this is a doc string. Otherwise, there is none. */
995 if (c2 != '"' || c1 != '\\')
998 fprintf (stderr, "## non-docstring in %s (%s)\n",
1006 else if (! strcmp (buffer, "autoload"))
1011 read_lisp_symbol (infile, buffer);
1016 fprintf (stderr, "## unparsable name in autoload in %s\n",
1020 read_lisp_symbol (infile, buffer);
1021 if (strcmp (buffer, "quote"))
1023 fprintf (stderr, "## unparsable name in autoload in %s\n",
1027 read_lisp_symbol (infile, buffer);
1032 "## unparsable quoted name in autoload in %s\n",
1037 skip_white (infile);
1038 if ((c = getc (infile)) != '\"')
1040 fprintf (stderr, "## autoload of %s unparsable (%s)\n",
1044 read_c_string (infile, 0, 0);
1045 skip_white (infile);
1047 if (saved_string == 0)
1049 /* If the next three characters aren't `dquote bslash newline'
1050 then we're not reading a docstring. */
1051 if ((c = getc (infile)) != '"' ||
1052 (c = getc (infile)) != '\\' ||
1053 (c = getc (infile)) != '\n')
1056 fprintf (stderr, "## non-docstring in %s (%s)\n",
1064 #if 0 /* causes crash */
1065 else if (! strcmp (buffer, "if") ||
1066 ! strcmp (buffer, "byte-code"))
1073 fprintf (stderr, "## unrecognized top-level form, %s (%s)\n",
1079 /* At this point, we should either use the previous
1080 dynamic doc string in saved_string
1081 or gobble a doc string from the input file.
1083 In the latter case, the opening quote (and leading
1084 backslash-newline) have already been read. */
1085 putc ('\n', outfile); /* XEmacs addition */
1086 putc (037, outfile);
1087 putc (type, outfile);
1088 fprintf (outfile, "%s\n", buffer);
1091 fputs (saved_string, outfile);
1092 /* Don't use one dynamic doc string twice. */
1093 free (saved_string);
1097 read_c_string (infile, 1, 0);