1 /* Generate doc-string file for XEmacs from source files.
2 Copyright (C) 1985, 1986, 1992, 1993, 1994 Free Software Foundation, Inc.
3 Copyright (C) 1995 Board of Trustees, University of Illinois
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 19.30. */
24 /* The arguments given to this program are all the C and Lisp source files
25 of XEmacs. .elc and .el and .c files are allowed.
26 A .o file can also be specified; the .c file it was made from is used.
27 This helps the makefile pass the correct list of files.
29 The results, which go to standard output or to a file
30 specified with -a or -o (-a to append, -o to start from nothing),
31 are entries containing function or variable names and their documentation.
32 Each entry starts with a ^_ character.
33 Then comes F for a function or V for a variable.
34 Then comes the function or variable name, terminated with a newline.
35 Then comes the documentation for that function or variable.
37 Added 19.15/20.1: `-i site-packages' allow installer to dump extra packages
38 without modifying Makefiles, etc.
41 #define NO_SHORTNAMES /* Tell config not to load remap.h */
42 #include <../src/config.h>
46 #if __STDC__ || defined(STDC_HEADERS)
53 #include <sys/param.h>
55 #if defined(MSDOS) || defined(__CYGWIN32__)
63 #endif /* WINDOWSNT */
65 #if defined(DOS_NT) || defined(__CYGWIN32__)
66 #define READ_TEXT "rt"
67 #define READ_BINARY "rb"
68 #define WRITE_BINARY "wb"
69 #define APPEND_BINARY "ab"
70 #else /* not DOS_NT */
72 #define READ_BINARY "r"
73 #define WRITE_BINARY "w"
74 #define APPEND_BINARY "a"
75 #endif /* not DOS_NT */
78 /* s/msdos.h defines this as sys_chdir, but we're not linking with the
79 file where that function is defined. */
83 /* Stdio stream for output to the DOC file. */
93 static int scan_file (CONST char *filename);
94 static int read_c_string (FILE *, int, int);
95 static void write_c_args (FILE *out, CONST char *func, char *buf, int minargs,
97 static int scan_c_file (CONST char *filename, CONST char *mode);
98 static void skip_white (FILE *);
99 static void read_lisp_symbol (FILE *, char *);
100 static int scan_lisp_file (CONST char *filename, CONST char *mode);
102 #define C_IDENTIFIER_CHAR_P(c) \
103 (('A' <= c && c <= 'Z') || \
104 ('a' <= c && c <= 'z') || \
105 ('0' <= c && c <= '9') || \
108 /* Name this program was invoked with. */
111 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
114 error (CONST char *s1, CONST char *s2)
116 fprintf (stderr, "%s: ", progname);
117 fprintf (stderr, s1, s2);
118 fprintf (stderr, "\n");
121 /* Print error message and exit. */
124 fatal (CONST char *s1, CONST char *s2)
130 /* Like malloc but get fatal error if memory is exhausted. */
133 xmalloc (unsigned int size)
135 long *result = (long *) malloc (size);
137 fatal ("virtual memory exhausted", 0);
142 next_extra_elc(char *extra_elcs)
144 static FILE *fp = NULL;
145 static char line_buf[BUFSIZ];
146 char *p = line_buf+1;
151 } else if (!(fp = fopen(extra_elcs, READ_BINARY))) {
152 /* It is not an error if this file doesn't exist. */
153 /*fatal("error opening site package file list", 0);*/
156 fgets(line_buf, BUFSIZ, fp);
160 if (!fgets(line_buf, BUFSIZ, fp)) {
166 if (strlen(p) <= 2 || strlen(p) >= (BUFSIZ - 5)) {
167 /* reject too short or too long lines */
170 p[strlen(p) - 2] = '\0';
178 main (int argc, char **argv)
183 char *extra_elcs = NULL;
189 /* Don't put CRs in the DOC file. */
192 #if 0 /* Suspicion is that this causes hanging.
193 So instead we require people to use -o on MSDOS. */
194 (stdout)->_flag &= ~_IOTEXT;
195 _setmode (fileno (stdout), O_BINARY);
201 _setmode (fileno (stdout), O_BINARY);
202 #endif /* WINDOWSNT */
204 /* If first two args are -o FILE, output to FILE. */
206 if (argc > i + 1 && !strcmp (argv[i], "-o"))
208 outfile = fopen (argv[i + 1], WRITE_BINARY);
211 if (argc > i + 1 && !strcmp (argv[i], "-a"))
213 outfile = fopen (argv[i + 1], APPEND_BINARY);
216 if (argc > i + 1 && !strcmp (argv[i], "-d"))
222 if (argc > (i + 1) && !strcmp(argv[i], "-i")) {
223 extra_elcs = argv[i + 1];
228 fatal ("No output file specified", "");
231 for (; i < argc; i++)
234 /* Don't process one file twice. */
235 for (j = first_infile; j < i; j++)
236 if (! strcmp (argv[i], argv[j]))
239 /* err_count seems to be {mis,un}used */
240 err_count += scan_file (argv[i]);
246 while ((p = next_extra_elc(extra_elcs)) != NULL) {
247 err_count += scan_file(p);
251 putc ('\n', outfile);
253 exit (err_count > 0);
255 return err_count > 0;
258 /* Read file FILENAME and output its doc strings to outfile. */
259 /* Return 1 if file is not found, 0 if it is found. */
262 scan_file (CONST char *filename)
264 int len = strlen (filename);
265 if (len > 4 && !strcmp (filename + len - 4, ".elc"))
267 Current_file_type = elc_file;
268 return scan_lisp_file (filename, READ_BINARY);
270 else if (len > 3 && !strcmp (filename + len - 3, ".el"))
272 Current_file_type = el_file;
273 return scan_lisp_file (filename, READ_TEXT);
277 Current_file_type = c_file;
278 return scan_c_file (filename, READ_TEXT);
284 /* Skip a C string from INFILE,
285 and return the character that follows the closing ".
286 If printflag is positive, output string contents to outfile.
287 If it is negative, store contents in buf.
288 Convert escape sequences \n and \t to newline and tab;
289 discard \ followed by newline. */
292 read_c_string (FILE *infile, int printflag, int c_docstring)
301 while ((c_docstring || c != '"') && c != EOF)
307 int cc = getc (infile);
317 putc ('\n', outfile);
318 else if (printflag < 0)
332 if (!c_docstring && c == 'n')
344 else if (printflag < 0)
349 /* look for continuation of string */
350 if (Current_file_type == c_file)
352 while (isspace (c = getc (infile)))
362 /* If we had a "", concatenate the two strings. */
373 /* Write to file OUT the argument names of function FUNC, whose text is in BUF.
374 MINARGS and MAXARGS are the minimum and maximum number of arguments. */
377 write_c_args (FILE *out, CONST char *func, char *buff, int minargs,
386 fprintf (out, "(%s", func);
388 /* XEmacs - "arguments:" is for parsing the docstring. FSF's help system
389 doesn't parse the docstring for arguments like we do, so we're also
390 going to omit the function name to preserve compatibility with elisp
391 that parses the docstring. Finally, not prefixing the arglist with
392 anything is asking for trouble because it's not uncommon to have an
393 unescaped parenthesis at the beginning of a line. --Stig */
394 fprintf (out, "arguments: (");
400 for (p = buff; *p; p++)
405 /* Add support for ANSI prototypes. Hop over
406 "Lisp_Object" string (the only C type allowed in DEFUNs) */
407 static char lo[] = "Lisp_Object";
408 if ((C_IDENTIFIER_CHAR_P (c) != in_ident) && !in_ident &&
409 (strncmp (p, lo, sizeof (lo) - 1) == 0) &&
410 isspace(*(p + sizeof (lo) - 1)))
412 p += (sizeof (lo) - 1);
418 /* Notice when we start printing a new identifier. */
419 if (C_IDENTIFIER_CHAR_P (c) != in_ident)
426 /* XEmacs - This goes along with the change above. */
430 if (minargs == 0 && maxargs > 0)
431 fprintf (out, "&optional ");
441 /* Print the C argument list as it would appear in lisp:
442 print underscores as hyphens, and print commas as spaces.
443 Collapse adjacent spaces into one. */
444 if (c == '_') c = '-';
445 if (c == ',') c = ' ';
447 /* If the C argument name ends with `_', change it to ' ',
448 to allow use of C reserved words or global symbols as Lisp args. */
449 if (c == '-' && ! C_IDENTIFIER_CHAR_P (p[1]))
454 else if (c != ' ' || ! just_spaced)
456 if (c >= 'a' && c <= 'z')
457 /* Upcase the letter. */
462 just_spaced = (c == ' ');
467 putc ('\n', out); /* XEmacs addition */
470 /* Read through a c file. If a .o file is named,
471 the corresponding .c file is read instead.
472 Looks for DEFUN constructs such as are defined in ../src/lisp.h.
473 Accepts any word starting DEF... so it finds DEFSIMPLE and DEFPRED. */
476 scan_c_file (CONST char *filename, CONST char *mode)
481 register int defunflag;
482 register int defvarperbufferflag = 0;
483 register int defvarflag;
484 int minargs, maxargs;
485 int l = strlen (filename);
491 errno = ENAMETOOLONG;
498 strcpy (f, filename);
501 infile = fopen (f, mode);
503 /* No error if non-ex input file */
511 while (!feof (infile))
548 /* Note that this business doesn't apply under XEmacs.
549 DEFVAR_BUFFER_LOCAL in XEmacs behaves normally. */
550 defvarperbufferflag = (c == 'P');
563 defunflag = (c == 'U');
579 c = read_c_string (infile, -1, 0);
583 else if (defvarperbufferflag)
587 else /* For DEFSIMPLE and DEFPRED */
595 if (defunflag && (commas == 1 || commas == 2))
599 while (c == ' ' || c == '\n' || c == '\t')
604 if (commas == 2) /* pick up minargs */
605 fscanf (infile, "%d", &minargs);
606 else /* pick up maxargs */
607 if (c == 'M' || c == 'U') /* MANY || UNEVALLED */
610 fscanf (infile, "%d", &maxargs);
617 while (c == ' ' || c == '\n' || c == '\t')
620 c = read_c_string (infile, 0, 0);
621 if (defunflag | defvarflag)
635 while (c == ' ' || c == '\n' || c == '\t')
637 if (defunflag | defvarflag)
640 if (defunflag || defvarflag || c == '"')
643 putc (defvarflag ? 'V' : 'F', outfile);
644 fprintf (outfile, "%s\n", buf);
645 c = read_c_string (infile, 1, (defunflag || defvarflag));
647 /* If this is a defun, find the arguments and print them. If
648 this function takes MANY or UNEVALLED args, then the C source
649 won't give the names of the arguments, so we shouldn't bother
650 trying to find them. */
651 if (defunflag && maxargs != -1)
653 char argbuf[1024], *p = argbuf;
654 #if 0 /* For old DEFUN's only */
662 /* Skip into arguments. */
669 /* Copy arguments into ARGBUF. */
672 *p++ = c = getc (infile);
676 fprintf (outfile, "\n\n");
677 write_c_args (outfile, buf, argbuf, minargs, maxargs);
686 /* Read a file of Lisp code, compiled or interpreted.
688 (defun NAME ARGS DOCSTRING ...)
689 (defmacro NAME ARGS DOCSTRING ...)
690 (autoload (quote NAME) FILE DOCSTRING ...)
691 (defvar NAME VALUE DOCSTRING)
692 (defconst NAME VALUE DOCSTRING)
693 (fset (quote NAME) (make-byte-code ... DOCSTRING ...))
694 (fset (quote NAME) #[... DOCSTRING ...])
695 (defalias (quote NAME) #[... DOCSTRING ...])
696 starting in column zero.
697 (quote NAME) may appear as 'NAME as well.
699 We also look for #@LENGTH CONTENTS^_ at the beginning of the line.
700 When we find that, we save it for the following defining-form,
701 and we use that instead of reading a doc string within that defining-form.
703 For defun, defmacro, and autoload, we know how to skip over the arglist.
704 For defvar, defconst, and fset we skip to the docstring with a kludgy
705 formatting convention: all docstrings must appear on the same line as the
706 initial open-paren (the one in column zero) and must contain a backslash
707 and a double-quote immediately after the initial double-quote. No newlines
708 must appear between the beginning of the form and the first double-quote.
709 The only source file that must follow this convention is loaddefs.el; aside
710 from that, it is always the .elc file that we look at, and they are no
711 problem because byte-compiler output follows this convention.
712 The NAME and DOCSTRING are output.
713 NAME is preceded by `F' for a function or `V' for a variable.
714 An entry is output only if DOCSTRING has \ newline just after the opening "
718 skip_white (FILE *infile)
721 while (c == ' ' || c == '\t' || c == '\n')
727 read_lisp_symbol (FILE *infile, char *buffer)
730 char *fillp = buffer;
737 /* FSF has *(++fillp), which is wrong. */
738 *fillp++ = getc (infile);
739 else if (c == ' ' || c == '\t' || c == '\n' || c == '(' || c == ')')
750 fprintf (stderr, "## expected a symbol, got '%c'\n", c);
756 scan_lisp_file (CONST char *filename, CONST char *mode)
760 char *saved_string = 0;
762 infile = fopen (filename, mode);
766 return 0; /* No error */
770 while (!feof (infile))
781 /* Detect a dynamic doc string and save it for the next expression. */
790 /* Read the length. */
791 while ((c = getc (infile),
792 c >= '0' && c <= '9'))
798 /* The next character is a space that is counted in the length
799 but not part of the doc string.
800 We already read it, so just ignore it. */
803 /* Read in the contents. */
804 if (saved_string != 0)
806 saved_string = (char *) xmalloc (length);
807 for (i = 0; i < length; i++)
808 saved_string[i] = getc (infile);
809 /* The last character is a ^_.
810 That is needed in the .elc file
811 but it is redundant in DOC. So get rid of it here. */
812 saved_string[length - 1] = 0;
813 /* Skip the newline. */
824 read_lisp_symbol (infile, buffer);
826 if (! strcmp (buffer, "defun") ||
827 ! strcmp (buffer, "defmacro"))
830 read_lisp_symbol (infile, buffer);
832 /* Skip the arguments: either "nil" or a list in parens */
835 if (c == 'n') /* nil */
837 if ((c = getc (infile)) != 'i' ||
838 (c = getc (infile)) != 'l')
840 fprintf (stderr, "## unparsable arglist in %s (%s)\n",
847 fprintf (stderr, "## unparsable arglist in %s (%s)\n",
856 /* If the next three characters aren't `dquote bslash newline'
857 then we're not reading a docstring.
859 if ((c = getc (infile)) != '"' ||
860 (c = getc (infile)) != '\\' ||
861 (c = getc (infile)) != '\n')
864 fprintf (stderr, "## non-docstring in %s (%s)\n",
871 else if (! strcmp (buffer, "defvar") ||
872 ! strcmp (buffer, "defconst"))
876 read_lisp_symbol (infile, buffer);
878 if (saved_string == 0)
881 /* Skip until the first newline; remember the two previous chars. */
882 while (c != '\n' && c >= 0)
884 /* ### Kludge -- Ignore any ESC x x ISO2022 sequences */
898 /* If two previous characters were " and \,
899 this is a doc string. Otherwise, there is none. */
900 if (c2 != '"' || c1 != '\\')
903 fprintf (stderr, "## non-docstring in %s (%s)\n",
911 else if (! strcmp (buffer, "fset") || ! strcmp (buffer, "defalias"))
918 read_lisp_symbol (infile, buffer);
923 fprintf (stderr, "## unparsable name in fset in %s\n",
927 read_lisp_symbol (infile, buffer);
928 if (strcmp (buffer, "quote"))
930 fprintf (stderr, "## unparsable name in fset in %s\n",
934 read_lisp_symbol (infile, buffer);
939 "## unparsable quoted name in fset in %s\n",
945 if (saved_string == 0)
947 /* Skip until the first newline; remember the two previous chars. */
948 while (c != '\n' && c >= 0)
955 /* If two previous characters were " and \,
956 this is a doc string. Otherwise, there is none. */
957 if (c2 != '"' || c1 != '\\')
960 fprintf (stderr, "## non-docstring in %s (%s)\n",
968 else if (! strcmp (buffer, "autoload"))
973 read_lisp_symbol (infile, buffer);
978 fprintf (stderr, "## unparsable name in autoload in %s\n",
982 read_lisp_symbol (infile, buffer);
983 if (strcmp (buffer, "quote"))
985 fprintf (stderr, "## unparsable name in autoload in %s\n",
989 read_lisp_symbol (infile, buffer);
994 "## unparsable quoted name in autoload in %s\n",
1000 if ((c = getc (infile)) != '\"')
1002 fprintf (stderr, "## autoload of %s unparsable (%s)\n",
1006 read_c_string (infile, 0, 0);
1007 skip_white (infile);
1009 if (saved_string == 0)
1011 /* If the next three characters aren't `dquote bslash newline'
1012 then we're not reading a docstring. */
1013 if ((c = getc (infile)) != '"' ||
1014 (c = getc (infile)) != '\\' ||
1015 (c = getc (infile)) != '\n')
1018 fprintf (stderr, "## non-docstring in %s (%s)\n",
1026 #if 0 /* causes crash */
1027 else if (! strcmp (buffer, "if") ||
1028 ! strcmp (buffer, "byte-code"))
1035 fprintf (stderr, "## unrecognized top-level form, %s (%s)\n",
1041 /* At this point, we should either use the previous
1042 dynamic doc string in saved_string
1043 or gobble a doc string from the input file.
1045 In the latter case, the opening quote (and leading
1046 backslash-newline) have already been read. */
1047 putc ('\n', outfile); /* XEmacs addition */
1048 putc (037, outfile);
1049 putc (type, outfile);
1050 fprintf (outfile, "%s\n", buffer);
1053 fputs (saved_string, outfile);
1054 /* Don't use one dynamic doc string twice. */
1055 free (saved_string);
1059 read_c_string (infile, 1, 0);