Resorted; add some missing Morohashi's Daikanwa characters; add

[chise/xemacs-chise.git-] / info / lispref.info-43
diff --git a/info/lispref.info-43 b/info/lispref.info-43

index d39722d..4b1d70b 100644 (file)
--- a/info/lispref.info-43
+++ b/info/lispref.info-43
@@ -50,6 +50,513 @@ may be included in a translation approved by the Free Software
  Foundation instead of in the original English.
  
  \1f
+File: lispref.info,  Node: Unimplemented libpq Functions,  Prev: Other libpq Functions,  Up: XEmacs PostgreSQL libpq API
+
+Unimplemented libpq Functions
+-----------------------------
+
+ - Unimplemented Function: PGconn *PQsetdbLogin (char *pghost, char
+          *pgport, char *pgoptions, char *pgtty, char *dbName, char
+          *login, char *pwd)
+     Synchronous database connection.  PGHOST is the hostname of the
+     PostgreSQL backend to connect to.  PGPORT is the TCP port number
+     to use.  PGOPTIONS specifies other backend options.  PGTTY
+     specifies the debugging tty to use.  DBNAME specifies the database
+     name to use.  LOGIN specifies the database user name.  PWD
+     specifies the database user's password.
+
+     This routine is deprecated as of libpq-7.0, and its functionality
+     can be replaced by external Lisp code if needed.
+
+ - Unimplemented Function: PGconn *PQsetdb (char *pghost, char *pgport,
+          char *pgoptions, char *pgtty, char *dbName)
+     Synchronous database connection.  PGHOST is the hostname of the
+     PostgreSQL backend to connect to.  PGPORT is the TCP port number
+     to use.  PGOPTIONS specifies other backend options.  PGTTY
+     specifies the debugging tty to use.  DBNAME specifies the database
+     name to use.
+
+     This routine was deprecated in libpq-6.5.
+
+ - Unimplemented Function: int PQsocket (PGconn *conn)
+     Return socket file descriptor to a backend database process.  CONN
+     database connection object.
+
+ - Unimplemented Function: void PQprint (FILE *fout, PGresult *res,
+          PGprintOpt *ps)
+     Print out the results of a query to a designated C stream.  FOUT C
+     stream to print to RES the query result object to print PS the
+     print options structure.
+
+     This routine is deprecated as of libpq-7.0 and cannot be sensibly
+     exported to XEmacs Lisp.
+
+ - Unimplemented Function: void PQdisplayTuples (PGresult *res, FILE
+          *fp, int fillAlign, char *fieldSep, int printHeader, int
+          quiet)
+     RES query result object to print FP C stream to print to FILLALIGN
+     pad the fields with spaces FIELDSEP field separator PRINTHEADER
+     display headers?  QUIET
+
+     This routine was deprecated in libpq-6.5.
+
+ - Unimplemented Function: void PQprintTuples (PGresult *res, FILE
+          *fout, int printAttName, int terseOutput, int width)
+     RES query result object to print FOUT C stream to print to
+     PRINTATTNAME print attribute names TERSEOUTPUT delimiter bars
+     WIDTH width of column, if 0, use variable width
+
+     This routine was deprecated in libpq-6.5.
+
+ - Unimplemented Function: int PQmblen (char *s, int encoding)
+     Determine length of a multibyte encoded char at `*s'.  S encoded
+     string ENCODING type of encoding
+
+     Compatibility note:  This function was introduced in libpq-7.0.
+
+ - Unimplemented Function: void PQtrace (PGconn *conn, FILE *debug_port)
+     Enable tracing on `debug_port'.  CONN database connection object.
+     DEBUG_PORT C output stream to use.
+
+ - Unimplemented Function: void PQuntrace (PGconn *conn)
+     Disable tracing.  CONN database connection object.
+
+ - Unimplemented Function: char *PQoidStatus (PGconn *conn)
+     Return the object id as a string of the last tuple inserted.  CONN
+     database connection object.
+
+     Compatibility note: This function is deprecated in libpq-7.0,
+     however it is used internally by the XEmacs binding code when
+     linked against versions prior to 7.0.
+
+ - Unimplemented Function: PGresult *PQfn (PGconn *conn, int fnid, int
+          *result_buf, int *result_len, int result_is_int, PQArgBlock
+          *args, int nargs)
+     "Fast path" interface -- not really recommended for application use
+     CONN A database connection object.  FNID RESULT_BUF RESULT_LEN
+     RESULT_IS_INT ARGS NARGS
+
+   The following set of very low level large object functions aren't
+appropriate to be exported to Lisp.
+
+ - Unimplemented Function: int pq-lo-open (PGconn *conn, int lobjid,
+          int mode)
+     CONN a database connection object.  LOBJID a large object ID.
+     MODE opening modes.
+
+ - Unimplemented Function: int pq-lo-close (PGconn *conn, int fd)
+     CONN a database connection object.  FD a large object file
+     descriptor
+
+ - Unimplemented Function: int pq-lo-read (PGconn *conn, int fd, char
+          *buf, int len)
+     CONN a database connection object.  FD a large object file
+     descriptor.  BUF buffer to read into.  LEN size of buffer.
+
+ - Unimplemented Function: int pq-lo-write (PGconn *conn, int fd, char
+          *buf, size_t len)
+     CONN a database connection object.  FD a large object file
+     descriptor.  BUF buffer to write from.  LEN size of buffer.
+
+ - Unimplemented Function: int pq-lo-lseek (PGconn *conn, int fd, int
+          offset, int whence)
+     CONN a database connection object.  FD a large object file
+     descriptor.  OFFSET WHENCE
+
+ - Unimplemented Function: int pq-lo-creat (PGconn *conn, int mode)
+     CONN a database connection object.  MODE opening modes.
+
+ - Unimplemented Function: int pq-lo-tell (PGconn *conn, int fd)
+     CONN a database connection object.  FD a large object file
+     descriptor.
+
+ - Unimplemented Function: int pq-lo-unlink (PGconn *conn, int lobjid)
+     CONN a database connection object.  LBOJID a large object ID.
+
+\1f
+File: lispref.info,  Node: XEmacs PostgreSQL libpq Examples,  Prev: XEmacs PostgreSQL libpq API,  Up: PostgreSQL Support
+
+XEmacs PostgreSQL libpq Examples
+================================
+
+   This is an example of one method of establishing an asynchronous
+connection.
+
+     (defun database-poller (P)
+       (message "%S before poll" (pq-pgconn P 'pq::status))
+       (pq-connect-poll P)
+       (message "%S after poll" (pq-pgconn P 'pq::status))
+       (if (eq (pq-pgconn P 'pq::status) 'pg::connection-ok)
+           (message "Done!")
+         (add-timeout .1 'database-poller P)))
+          => database-poller
+     (progn
+       (setq P (pq-connect-start ""))
+       (add-timeout .1 'database-poller P))
+          => pg::connection-started before poll
+          => pg::connection-made after poll
+          => pg::connection-made before poll
+          => pg::connection-awaiting-response after poll
+          => pg::connection-awaiting-response before poll
+          => pg::connection-auth-ok after poll
+          => pg::connection-auth-ok before poll
+          => pg::connection-setenv after poll
+          => pg::connection-setenv before poll
+          => pg::connection-ok after poll
+          => Done!
+     P
+          => #<PGconn localhost:25432 steve/steve>
+
+   Here is an example of one method of doing an asynchronous reset.
+
+     (defun database-poller (P)
+       (let (PS)
+         (message "%S before poll" (pq-pgconn P 'pq::status))
+         (setq PS (pq-reset-poll P))
+         (message "%S after poll [%S]" (pq-pgconn P 'pq::status) PS)
+         (if (eq (pq-pgconn P 'pq::status) 'pg::connection-ok)
+       (message "Done!")
+           (add-timeout .1 'database-poller P))))
+          => database-poller
+     (progn
+       (pq-reset-start P)
+       (add-timeout .1 'database-poller P))
+          => pg::connection-started before poll
+          => pg::connection-made after poll [pgres::polling-writing]
+          => pg::connection-made before poll
+          => pg::connection-awaiting-response after poll [pgres::polling-reading]
+          => pg::connection-awaiting-response before poll
+          => pg::connection-setenv after poll [pgres::polling-reading]
+          => pg::connection-setenv before poll
+          => pg::connection-ok after poll [pgres::polling-ok]
+          => Done!
+     P
+          => #<PGconn localhost:25432 steve/steve>
+
+   And finally, an asynchronous query.
+
+     (defun database-poller (P)
+       (let (R)
+         (pq-consume-input P)
+         (if (pq-is-busy P)
+       (add-timeout .1 'database-poller P)
+           (setq R (pq-get-result P))
+           (if R
+         (progn
+           (push R result-list)
+           (add-timeout .1 'database-poller P))))))
+          => database-poller
+     (when (pq-send-query P "SELECT * FROM xemacs_test;")
+       (setq result-list nil)
+       (add-timeout .1 'database-poller P))
+          => 885
+     ;; wait a moment
+     result-list
+          => (#<PGresult PGRES_TUPLES_OK - SELECT>)
+
+   Here is an example showing how multiple SQL statements in a single
+query can have all their results collected.
+     ;; Using the same `database-poller' function from the previous example
+     (when (pq-send-query P "SELECT * FROM xemacs_test;
+     SELECT * FROM pg_database;
+     SELECT * FROM pg_user;")
+       (setq result-list nil)
+       (add-timeout .1 'database-poller P))
+          => 1782
+     ;; wait a moment
+     result-list
+          => (#<PGresult PGRES_TUPLES_OK - SELECT> #<PGresult PGRES_TUPLES_OK - SELECT> #<PGresult PGRES_TUPLES_OK - SELECT>)
+
+   Here is an example which illustrates collecting all data from a
+query, including the field names.
+
+     (defun pg-util-query-results (results)
+       "Retrieve results of last SQL query into a list structure."
+       (let ((i (1- (pq-ntuples R)))
+       j l1 l2)
+         (while (>= i 0)
+           (setq j (1- (pq-nfields R)))
+           (setq l2 nil)
+           (while (>= j 0)
+       (push (pq-get-value R i j) l2)
+       (decf j))
+           (push l2 l1)
+           (decf i))
+         (setq j (1- (pq-nfields R)))
+         (setq l2 nil)
+         (while (>= j 0)
+           (push (pq-fname R j) l2)
+           (decf j))
+         (push l2 l1)
+         l1))
+          => pg-util-query-results
+     (setq R (pq-exec P "SELECT * FROM xemacs_test ORDER BY field2 DESC;"))
+          => #<PGresult PGRES_TUPLES_OK - SELECT>
+     (pg-util-query-results R)
+          => (("f1" "field2") ("a" "97") ("b" "97") ("stuff" "42") ("a string" "12") ("foo" "10") ("string" "2") ("text" "1"))
+
+   Here is an example of a query that uses a database cursor.
+
+     (let (data R)
+       (setq R (pq-exec P "BEGIN;"))
+       (setq R (pq-exec P "DECLARE k_cursor CURSOR FOR SELECT * FROM xemacs_test ORDER BY f1 DESC;"))
+     
+       (setq R (pq-exec P "FETCH k_cursor;"))
+       (while (eq (pq-ntuples R) 1)
+         (push (list (pq-get-value R 0 0) (pq-get-value R 0 1)) data)
+         (setq R (pq-exec P "FETCH k_cursor;")))
+       (setq R (pq-exec P "END;"))
+       data)
+          => (("a" "97") ("a string" "12") ("b" "97") ("foo" "10") ("string" "2") ("stuff" "42") ("text" "1"))
+
+   Here's another example of cursors, this time with a Lisp macro to
+implement a mapping function over a table.
+
+     (defmacro map-db (P table condition callout)
+       `(let (R)
+          (pq-exec ,P "BEGIN;")
+          (pq-exec ,P (concat "DECLARE k_cursor CURSOR FOR SELECT * FROM "
+                        ,table
+                        " "
+                        ,condition
+                        " ORDER BY f1 DESC;"))
+          (setq R (pq-exec P "FETCH k_cursor;"))
+          (while (eq (pq-ntuples R) 1)
+            (,callout (pq-get-value R 0 0) (pq-get-value R 0 1))
+            (setq R (pq-exec P "FETCH k_cursor;")))
+          (pq-exec P "END;")))
+          => map-db
+     (defun callback (arg1 arg2)
+       (message "arg1 = %s, arg2 = %s" arg1 arg2))
+          => callback
+     (map-db P "xemacs_test" "WHERE field2 > 10" callback)
+          => arg1 = stuff, arg2 = 42
+          => arg1 = b, arg2 = 97
+          => arg1 = a string, arg2 = 12
+          => arg1 = a, arg2 = 97
+          => #<PGresult PGRES_COMMAND_OK - COMMIT>
+
+\1f
+File: lispref.info,  Node: Internationalization,  Next: MULE,  Prev: PostgreSQL Support,  Up: Top
+
+Internationalization
+********************
+
+* Menu:
+
+* I18N Levels 1 and 2:: Support for different time, date, and currency formats.
+* I18N Level 3::        Support for localized messages.
+* I18N Level 4::        Support for Asian languages.
+
+\1f
+File: lispref.info,  Node: I18N Levels 1 and 2,  Next: I18N Level 3,  Up: Internationalization
+
+I18N Levels 1 and 2
+===================
+
+   XEmacs is now compliant with I18N levels 1 and 2.  Specifically,
+this means that it is 8-bit clean and correctly handles time and date
+functions.  XEmacs will correctly display the entire ISO-Latin 1
+character set.
+
+   The compose key may now be used to create any character in the
+ISO-Latin 1 character set not directly available via the keyboard..  In
+order for the compose key to work it is necessary to load the file
+`x-compose.el'.  At any time while composing a character, `C-h' will
+display all valid completions and the character which would be produced.
+
+\1f
+File: lispref.info,  Node: I18N Level 3,  Next: I18N Level 4,  Prev: I18N Levels 1 and 2,  Up: Internationalization
+
+I18N Level 3
+============
+
+* Menu:
+
+* Level 3 Basics::
+* Level 3 Primitives::
+* Dynamic Messaging::
+* Domain Specification::
+* Documentation String Extraction::
+
+\1f
+File: lispref.info,  Node: Level 3 Basics,  Next: Level 3 Primitives,  Up: I18N Level 3
+
+Level 3 Basics
+--------------
+
+   XEmacs now provides alpha-level functionality for I18N Level 3.
+This means that everything necessary for full messaging is available,
+but not every file has been converted.
+
+   The two message files which have been created are `src/emacs.po' and
+`lisp/packages/mh-e.po'.  Both files need to be converted using
+`msgfmt', and the resulting `.mo' files placed in some locale's
+`LC_MESSAGES' directory.  The test "translations" in these files are
+the original messages prefixed by `TRNSLT_'.
+
+   The domain for a variable is stored on the variable's property list
+under the property name VARIABLE-DOMAIN.  The function
+`documentation-property' uses this information when translating a
+variable's documentation.
+
+\1f
+File: lispref.info,  Node: Level 3 Primitives,  Next: Dynamic Messaging,  Prev: Level 3 Basics,  Up: I18N Level 3
+
+Level 3 Primitives
+------------------
+
+ - Function: gettext string
+     This function looks up STRING in the default message domain and
+     returns its translation.  If `I18N3' was not enabled when XEmacs
+     was compiled, it just returns STRING.
+
+ - Function: dgettext domain string
+     This function looks up STRING in the specified message domain and
+     returns its translation.  If `I18N3' was not enabled when XEmacs
+     was compiled, it just returns STRING.
+
+ - Function: bind-text-domain domain pathname
+     This function associates a pathname with a message domain.  Here's
+     how the path to message file is constructed under SunOS 5.x:
+
+          `{pathname}/{LANG}/LC_MESSAGES/{domain}.mo'
+
+     If `I18N3' was not enabled when XEmacs was compiled, this function
+     does nothing.
+
+ - Special Form: domain string
+     This function specifies the text domain used for translating
+     documentation strings and interactive prompts of a function.  For
+     example, write:
+
+          (defun foo (arg) "Doc string" (domain "emacs-foo") ...)
+
+     to specify `emacs-foo' as the text domain of the function `foo'.
+     The "call" to `domain' is actually a declaration rather than a
+     function; when actually called, `domain' just returns `nil'.
+
+ - Function: domain-of function
+     This function returns the text domain of FUNCTION; it returns
+     `nil' if it is the default domain.  If `I18N3' was not enabled
+     when XEmacs was compiled, it always returns `nil'.
+
+\1f
+File: lispref.info,  Node: Dynamic Messaging,  Next: Domain Specification,  Prev: Level 3 Primitives,  Up: I18N Level 3
+
+Dynamic Messaging
+-----------------
+
+   The `format' function has been extended to permit you to change the
+order of parameter insertion.  For example, the conversion format
+`%1$s' inserts parameter one as a string, while `%2$s' inserts
+parameter two.  This is useful when creating translations which require
+you to change the word order.
+
+\1f
+File: lispref.info,  Node: Domain Specification,  Next: Documentation String Extraction,  Prev: Dynamic Messaging,  Up: I18N Level 3
+
+Domain Specification
+--------------------
+
+   The default message domain of XEmacs is `emacs'.  For add-on
+packages, it is best to use a different domain.  For example, let us
+say we want to convert the "gorilla" package to use the domain
+`emacs-gorilla'.  To translate the message "What gorilla?", use
+`dgettext' as follows:
+
+     (dgettext "emacs-gorilla" "What gorilla?")
+
+   A function (or macro) which has a documentation string or an
+interactive prompt needs to be associated with the domain in order for
+the documentation or prompt to be translated.  This is done with the
+`domain' special form as follows:
+
+     (defun scratch (location)
+       "Scratch the specified location."
+       (domain "emacs-gorilla")
+       (interactive "sScratch: ")
+       ... )
+
+   It is most efficient to specify the domain in the first line of the
+function body, before the `interactive' form.
+
+   For variables and constants which have documentation strings,
+specify the domain after the documentation.
+
+ - Special Form: defvar symbol [value [doc-string [domain]]]
+     Example:
+          (defvar weight 250 "Weight of gorilla, in pounds." "emacs-gorilla")
+
+ - Special Form: defconst symbol [value [doc-string [domain]]]
+     Example:
+          (defconst limbs 4 "Number of limbs" "emacs-gorilla")
+
+   Autoloaded functions which are specified in `loaddefs.el' do not need
+to have a domain specification, because their documentation strings are
+extracted into the main message base.  However, for autoloaded functions
+which are specified in a separate package, use following syntax:
+
+ - Function: autoload symbol filename &optional docstring interactive
+          macro domain
+     Example:
+          (autoload 'explore "jungle" "Explore the jungle." nil nil "emacs-gorilla")
+
+\1f
+File: lispref.info,  Node: Documentation String Extraction,  Prev: Domain Specification,  Up: I18N Level 3
+
+Documentation String Extraction
+-------------------------------
+
+   The utility `etc/make-po' scans the file `DOC' to extract
+documentation strings and creates a message file `doc.po'.  This file
+may then be inserted within `emacs.po'.
+
+   Currently, `make-po' is hard-coded to read from `DOC' and write to
+`doc.po'.  In order to extract documentation strings from an add-on
+package, first run `make-docfile' on the package to produce the `DOC'
+file.  Then run `make-po -p' with the `-p' argument to indicate that we
+are extracting documentation for an add-on package.
+
+   (The `-p' argument is a kludge to make up for a subtle difference
+between pre-loaded documentation and add-on documentation:  For add-on
+packages, the final carriage returns in the strings produced by
+`make-docfile' must be ignored.)
+
+\1f
+File: lispref.info,  Node: I18N Level 4,  Prev: I18N Level 3,  Up: Internationalization
+
+I18N Level 4
+============
+
+   The Asian-language support in XEmacs is called "MULE".  *Note MULE::.
+
+\1f
+File: lispref.info,  Node: MULE,  Next: Tips,  Prev: Internationalization,  Up: Top
+
+MULE
+****
+
+   "MULE" is the name originally given to the version of GNU Emacs
+extended for multi-lingual (and in particular Asian-language) support.
+"MULE" is short for "MUlti-Lingual Emacs".  It is an extension and
+complete rewrite of Nemacs ("Nihon Emacs" where "Nihon" is the Japanese
+word for "Japan"), which only provided support for Japanese.  XEmacs
+refers to its multi-lingual support as "MULE support" since it is based
+on "MULE".
+
+* Menu:
+
+* Internationalization Terminology::
+                        Definition of various internationalization terms.
+* Charsets::            Sets of related characters.
+* MULE Characters::     Working with characters in XEmacs/MULE.
+* Composite Characters:: Making new characters by overstriking other ones.
+* Coding Systems::      Ways of representing a string of chars using integers.
+* CCL::                 A special language for writing fast converters.
+* Category Tables::     Subdividing charsets into groups.
+
+\1f
  File: lispref.info,  Node: Internationalization Terminology,  Next: Charsets,  Up: MULE
  
  Internationalization Terminology
@@ -218,7 +725,7 @@ encoded by doing the same but also prefixing the character with the
  byte 0x8F.
  
     The advantage of a modal encoding is that it is generally more
-space-efficient, and is easily extendable because there are essentially
+space-efficient, and is easily extendible because there are essentially
  an arbitrary number of escape sequences that can be created.  The
  disadvantage, however, is that it is much more difficult to work with
  if it is not being processed in a sequential manner.  In the non-modal
@@ -639,396 +1146,3 @@ charsets work.)
                                          encodings.
  * Predefined Coding Systems::         Coding systems implemented by MULE.
  
-\1f
-File: lispref.info,  Node: Coding System Types,  Next: ISO 2022,  Up: Coding Systems
-
-Coding System Types
--------------------
-
-   The coding system type determines the basic algorithm XEmacs will
-use to decode or encode a data stream.  Character encodings will be
-converted to the MULE encoding, escape sequences processed, and newline
-sequences converted to XEmacs's internal representation.  There are
-three basic classes of coding system type: no-conversion, ISO-2022, and
-special.
-
-   No conversion allows you to look at the file's internal
-representation.  Since XEmacs is basically a text editor, "no
-conversion" does convert newline conventions by default.  (Use the
-'binary coding-system if this is not desired.)
-
-   ISO 2022 (*note ISO 2022::) is the basic international standard
-regulating use of "coded character sets for the exchange of data", ie,
-text streams.  ISO 2022 contains functions that make it possible to
-encode text streams to comply with restrictions of the Internet mail
-system and de facto restrictions of most file systems (eg, use of the
-separator character in file names).  Coding systems which are not ISO
-2022 conformant can be difficult to handle.  Perhaps more important,
-they are not adaptable to multilingual information interchange, with
-the obvious exception of ISO 10646 (Unicode).  (Unicode is partially
-supported by XEmacs with the addition of the Lisp package ucs-conv.)
-
-   The special class of coding systems includes automatic detection,
-CCL (a "little language" embedded as an interpreter, useful for
-translating between variants of a single character set),
-non-ISO-2022-conformant encodings like Unicode, Shift JIS, and Big5,
-and MULE internal coding.  (NB: this list is based on XEmacs 21.2.
-Terminology may vary slightly for other versions of XEmacs and for GNU
-Emacs 20.)
-
-`no-conversion'
-     No conversion, for binary files, and a few special cases of
-     non-ISO-2022 coding systems where conversion is done by hook
-     functions (usually implemented in CCL).  On output, graphic
-     characters that are not in ASCII or Latin-1 will be replaced by a
-     `?'. (For a no-conversion-encoded buffer, these characters will
-     only be present if you explicitly insert them.)
-
-`iso2022'
-     Any ISO-2022-compliant encoding.  Among others, this includes JIS
-     (the Japanese encoding commonly used for e-mail), national
-     variants of EUC (the standard Unix encoding for Japanese and other
-     languages), and Compound Text (an encoding used in X11).  You can
-     specify more specific information about the conversion with the
-     FLAGS argument.
-
-`ucs-4'
-     ISO 10646 UCS-4 encoding.  A 31-bit fixed-width superset of
-     Unicode.
-
-`utf-8'
-     ISO 10646 UTF-8 encoding.  A "file system safe" transformation
-     format that can be used with both UCS-4 and Unicode.
-
-`undecided'
-     Automatic conversion.  XEmacs attempts to detect the coding system
-     used in the file.
-
-`shift-jis'
-     Shift-JIS (a Japanese encoding commonly used in PC operating
-     systems).
-
-`big5'
-     Big5 (the encoding commonly used for Taiwanese).
-
-`ccl'
-     The conversion is performed using a user-written pseudo-code
-     program.  CCL (Code Conversion Language) is the name of this
-     pseudo-code.  For example, CCL is used to map KOI8-R characters
-     (an encoding for Russian Cyrillic) to ISO8859-5 (the form used
-     internally by MULE).
-
-`internal'
-     Write out or read in the raw contents of the memory representing
-     the buffer's text.  This is primarily useful for debugging
-     purposes, and is only enabled when XEmacs has been compiled with
-     `DEBUG_XEMACS' set (the `--debug' configure option).  *Warning*:
-     Reading in a file using `internal' conversion can result in an
-     internal inconsistency in the memory representing a buffer's text,
-     which will produce unpredictable results and may cause XEmacs to
-     crash.  Under normal circumstances you should never use `internal'
-     conversion.
-
-\1f
-File: lispref.info,  Node: ISO 2022,  Next: EOL Conversion,  Prev: Coding System Types,  Up: Coding Systems
-
-ISO 2022
-========
-
-   This section briefly describes the ISO 2022 encoding standard.  A
-more thorough treatment is available in the original document of ISO
-2022 as well as various national standards (such as JIS X 0202).
-
-   Character sets ("charsets") are classified into the following four
-categories, according to the number of characters in the charset:
-94-charset, 96-charset, 94x94-charset, and 96x96-charset.  This means
-that although an ISO 2022 coding system may have variable width
-characters, each charset used is fixed-width (in contrast to the MULE
-character set and UTF-8, for example).
-
-   ISO 2022 provides for switching between character sets via escape
-sequences.  This switching is somewhat complicated, because ISO 2022
-provides for both legacy applications like Internet mail that accept
-only 7 significant bits in some contexts (RFC 822 headers, for example),
-and more modern "8-bit clean" applications.  It also provides for
-compact and transparent representation of languages like Japanese which
-mix ASCII and a national script (even outside of computer programs).
-
-   First, ISO 2022 codified prevailing practice by dividing the code
-space into "control" and "graphic" regions.  The code points 0x00-0x1F
-and 0x80-0x9F are reserved for "control characters", while "graphic
-characters" must be assigned to code points in the regions 0x20-0x7F and
-0xA0-0xFF.  The positions 0x20 and 0x7F are special, and under some
-circumstances must be assigned the graphic character "ASCII SPACE" and
-the control character "ASCII DEL" respectively.
-
-   The various regions are given the name C0 (0x00-0x1F), GL
-(0x20-0x7F), C1 (0x80-0x9F), and GR (0xA0-0xFF).  GL and GR stand for
-"graphic left" and "graphic right", respectively, because of the
-standard method of displaying graphic character sets in tables with the
-high byte indexing columns and the low byte indexing rows.  I don't
-find it very intuitive, but these are called "registers".
-
-   An ISO 2022-conformant encoding for a graphic character set must use
-a fixed number of bytes per character, and the values must fit into a
-single register; that is, each byte must range over either 0x20-0x7F, or
-0xA0-0xFF.  It is not allowed to extend the range of the repertoire of a
-character set by using both ranges at the same.  This is why a standard
-character set such as ISO 8859-1 is actually considered by ISO 2022 to
-be an aggregation of two character sets, ASCII and LATIN-1, and why it
-is technically incorrect to refer to ISO 8859-1 as "Latin 1".  Also, a
-single character's bytes must all be drawn from the same register; this
-is why Shift JIS (for Japanese) and Big 5 (for Chinese) are not ISO
-2022-compatible encodings.
-
-   The reason for this restriction becomes clear when you attempt to
-define an efficient, robust encoding for a language like Japanese.
-Like ISO 8859, Japanese encodings are aggregations of several character
-sets.  In practice, the vast majority of characters are drawn from the
-"JIS Roman" character set (a derivative of ASCII; it won't hurt to
-think of it as ASCII) and the JIS X 0208 standard "basic Japanese"
-character set including not only ideographic characters ("kanji") but
-syllabic Japanese characters ("kana"), a wide variety of symbols, and
-many alphabetic characters (Roman, Greek, and Cyrillic) as well.
-Although JIS X 0208 includes the whole Roman alphabet, as a 2-byte code
-it is not suited to programming; thus the inclusion of ASCII in the
-standard Japanese encodings.
-
-   For normal Japanese text such as in newspapers, a broad repertoire of
-approximately 3000 characters is used.  Evidently this won't fit into
-one byte; two must be used.  But much of the text processed by Japanese
-computers is computer source code, nearly all of which is ASCII.  A not
-insignificant portion of ordinary text is English (as such or as
-borrowed Japanese vocabulary) or other languages which can represented
-at least approximately in ASCII, as well.  It seems reasonable then to
-represent ASCII in one byte, and JIS X 0208 in two.  And this is exactly
-what the Extended Unix Code for Japanese (EUC-JP) does.  ASCII is
-invoked to the GL register, and JIS X 0208 is invoked to the GR
-register.  Thus, each byte can be tested for its character set by
-looking at the high bit; if set, it is Japanese, if clear, it is ASCII.
-Furthermore, since control characters like newline can never be part of
-a graphic character, even in the case of corruption in transmission the
-stream will be resynchronized at every line break, on the order of 60-80
-bytes.  This coding system requires no escape sequences or special
-control codes to represent 99.9% of all Japanese text.
-
-   Note carefully the distinction between the character sets (ASCII and
-JIS X 0208), the encoding (EUC-JP), and the coding system (ISO 2022).
-The JIS X 0208 character set is used in three different encodings for
-Japanese, but in ISO-2022-JP it is invoked into GL (so the high bit is
-always clear), in EUC-JP it is invoked into GR (setting the high bit in
-the process), and in Shift JIS the high bit may be set or reset, and the
-significant bits are shifted within the 16-bit character so that the two
-main character sets can coexist with a third (the "halfwidth katakana"
-of JIS X 0201).  As the name implies, the ISO-2022-JP encoding is also a
-version of the ISO-2022 coding system.
-
-   In order to systematically treat subsidiary character sets (like the
-"halfwidth katakana" already mentioned, and the "supplementary kanji" of
-JIS X 0212), four further registers are defined: G0, G1, G2, and G3.
-Unlike GL and GR, they are not logically distinguished by internal
-format.  Instead, the process of "invocation" mentioned earlier is
-broken into two steps: first, a character set is "designated" to one of
-the registers G0-G3 by use of an "escape sequence" of the form:
-
-             ESC [I] I F
-
-   where I is an intermediate character or characters in the range 0x20
-- 0x3F, and F, from the range 0x30-0x7Fm is the final character
-identifying this charset.  (Final characters in the range 0x30-0x3F are
-reserved for private use and will never have a publically registered
-meaning.)
-
-   Then that register is "invoked" to either GL or GR, either
-automatically (designations to G0 normally involve invocation to GL as
-well), or by use of shifting (affecting only the following character in
-the data stream) or locking (effective until the next designation or
-locking) control sequences.  An encoding conformant to ISO 2022 is
-typically defined by designating the initial contents of the G0-G3
-registers, specifying an 7 or 8 bit environment, and specifying whether
-further designations will be recognized.
-
-   Some examples of character sets and the registered final characters
-F used to designate them:
-
-94-charset
-     ASCII (B), left (J) and right (I) half of JIS X 0201, ...
-
-96-charset
-     Latin-1 (A), Latin-2 (B), Latin-3 (C), ...
-
-94x94-charset
-     GB2312 (A), JIS X 0208 (B), KSC5601 (C), ...
-
-96x96-charset
-     none for the moment
-
-   The meanings of the various characters in these sequences, where not
-specified by the ISO 2022 standard (such as the ESC character), are
-assigned by "ECMA", the European Computer Manufacturers Association.
-
-   The meaning of intermediate characters are:
-
-             $ [0x24]: indicate charset of dimension 2 (94x94 or 96x96).
-             ( [0x28]: designate to G0 a 94-charset whose final byte is F.
-             ) [0x29]: designate to G1 a 94-charset whose final byte is F.
-             * [0x2A]: designate to G2 a 94-charset whose final byte is F.
-             + [0x2B]: designate to G3 a 94-charset whose final byte is F.
-             , [0x2C]: designate to G0 a 96-charset whose final byte is F.
-             - [0x2D]: designate to G1 a 96-charset whose final byte is F.
-             . [0x2E]: designate to G2 a 96-charset whose final byte is F.
-             / [0x2F]: designate to G3 a 96-charset whose final byte is F.
-
-   The comma may be used in files read and written only by MULE, as a
-MULE extension, but this is illegal in ISO 2022.  (The reason is that
-in ISO 2022 G0 must be a 94-member character set, with 0x20 assigned
-the value SPACE, and 0x7F assigned the value DEL.)
-
-   Here are examples of designations:
-
-             ESC ( B :              designate to G0 ASCII
-             ESC - A :              designate to G1 Latin-1
-             ESC $ ( A or ESC $ A : designate to G0 GB2312
-             ESC $ ( B or ESC $ B : designate to G0 JISX0208
-             ESC $ ) C :            designate to G1 KSC5601
-
-   (The short forms used to designate GB2312 and JIS X 0208 are for
-backwards compatibility; the long forms are preferred.)
-
-   To use a charset designated to G2 or G3, and to use a charset
-designated to G1 in a 7-bit environment, you must explicitly invoke G1,
-G2, or G3 into GL.  There are two types of invocation, Locking Shift
-(forever) and Single Shift (one character only).
-
-   Locking Shift is done as follows:
-
-             LS0 or SI (0x0F): invoke G0 into GL
-             LS1 or SO (0x0E): invoke G1 into GL
-             LS2:  invoke G2 into GL
-             LS3:  invoke G3 into GL
-             LS1R: invoke G1 into GR
-             LS2R: invoke G2 into GR
-             LS3R: invoke G3 into GR
-
-   Single Shift is done as follows:
-
-             SS2 or ESC N: invoke G2 into GL
-             SS3 or ESC O: invoke G3 into GL
-
-   The shift functions (such as LS1R and SS3) are represented by control
-characters (from C1) in 8 bit environments and by escape sequences in 7
-bit environments.
-
-   (#### Ben says: I think the above is slightly incorrect.  It appears
-that SS2 invokes G2 into GR and SS3 invokes G3 into GR, whereas ESC N
-and ESC O behave as indicated.  The above definitions will not parse
-EUC-encoded text correctly, and it looks like the code in mule-coding.c
-has similar problems.)
-
-   Evidently there are a lot of ISO-2022-compliant ways of encoding
-multilingual text.  Now, in the world, there exist many coding systems
-such as X11's Compound Text, Japanese JUNET code, and so-called EUC
-(Extended UNIX Code); all of these are variants of ISO 2022.
-
-   In MULE, we characterize a version of ISO 2022 by the following
-attributes:
-
-  1. The character sets initially designated to G0 thru G3.
-
-  2. Whether short form designations are allowed for Japanese and
-     Chinese.
-
-  3. Whether ASCII should be designated to G0 before control characters.
-
-  4. Whether ASCII should be designated to G0 at the end of line.
-
-  5. 7-bit environment or 8-bit environment.
-
-  6. Whether Locking Shifts are used or not.
-
-  7. Whether to use ASCII or the variant JIS X 0201-1976-Roman.
-
-  8. Whether to use JIS X 0208-1983 or the older version JIS X
-     0208-1976.
-
-   (The last two are only for Japanese.)
-
-   By specifying these attributes, you can create any variant of ISO
-2022.
-
-   Here are several examples:
-
-     ISO-2022-JP -- Coding system used in Japanese email (RFC 1463 #### check).
-             1. G0 <- ASCII, G1..3 <- never used
-             2. Yes.
-             3. Yes.
-             4. Yes.
-             5. 7-bit environment
-             6. No.
-             7. Use ASCII
-             8. Use JIS X 0208-1983
-     
-     ctext -- X11 Compound Text
-             1. G0 <- ASCII, G1 <- Latin-1, G2,3 <- never used.
-             2. No.
-             3. No.
-             4. Yes.
-             5. 8-bit environment.
-             6. No.
-             7. Use ASCII.
-             8. Use JIS X 0208-1983.
-     
-     euc-china -- Chinese EUC.  Often called the "GB encoding", but that is
-     technically incorrect.
-             1. G0 <- ASCII, G1 <- GB 2312, G2,3 <- never used.
-             2. No.
-             3. Yes.
-             4. Yes.
-             5. 8-bit environment.
-             6. No.
-             7. Use ASCII.
-             8. Use JIS X 0208-1983.
-     
-     ISO-2022-KR -- Coding system used in Korean email.
-             1. G0 <- ASCII, G1 <- KSC 5601, G2,3 <- never used.
-             2. No.
-             3. Yes.
-             4. Yes.
-             5. 7-bit environment.
-             6. Yes.
-             7. Use ASCII.
-             8. Use JIS X 0208-1983.
-
-   MULE creates all of these coding systems by default.
-
-\1f
-File: lispref.info,  Node: EOL Conversion,  Next: Coding System Properties,  Prev: ISO 2022,  Up: Coding Systems
-
-EOL Conversion
---------------
-
-`nil'
-     Automatically detect the end-of-line type (LF, CRLF, or CR).  Also
-     generate subsidiary coding systems named `NAME-unix', `NAME-dos',
-     and `NAME-mac', that are identical to this coding system but have
-     an EOL-TYPE value of `lf', `crlf', and `cr', respectively.
-
-`lf'
-     The end of a line is marked externally using ASCII LF.  Since this
-     is also the way that XEmacs represents an end-of-line internally,
-     specifying this option results in no end-of-line conversion.  This
-     is the standard format for Unix text files.
-
-`crlf'
-     The end of a line is marked externally using ASCII CRLF.  This is
-     the standard format for MS-DOS text files.
-
-`cr'
-     The end of a line is marked externally using ASCII CR.  This is the
-     standard format for Macintosh text files.
-
-`t'
-     Automatically detect the end-of-line type but do not generate
-     subsidiary coding systems.  (This value is converted to `nil' when
-     stored internally, and `coding-system-property' will return `nil'.)
-