Resorted; add some missing Morohashi's Daikanwa characters; add

[chise/xemacs-chise.git] / info / lispref.info-43
diff --git a/info/lispref.info-43 b/info/lispref.info-43

index d39722d..b2766a5 100644 (file)
--- a/info/lispref.info-43
+++ b/info/lispref.info-43
@@ -50,6 +50,513 @@ may be included in a translation approved by the Free Software
  Foundation instead of in the original English.
  
  \1f
+File: lispref.info,  Node: Unimplemented libpq Functions,  Prev: Other libpq Functions,  Up: XEmacs PostgreSQL libpq API
+
+Unimplemented libpq Functions
+-----------------------------
+
+ - Unimplemented Function: PGconn *PQsetdbLogin (char *pghost, char
+          *pgport, char *pgoptions, char *pgtty, char *dbName, char
+          *login, char *pwd)
+     Synchronous database connection.  PGHOST is the hostname of the
+     PostgreSQL backend to connect to.  PGPORT is the TCP port number
+     to use.  PGOPTIONS specifies other backend options.  PGTTY
+     specifies the debugging tty to use.  DBNAME specifies the database
+     name to use.  LOGIN specifies the database user name.  PWD
+     specifies the database user's password.
+
+     This routine is deprecated as of libpq-7.0, and its functionality
+     can be replaced by external Lisp code if needed.
+
+ - Unimplemented Function: PGconn *PQsetdb (char *pghost, char *pgport,
+          char *pgoptions, char *pgtty, char *dbName)
+     Synchronous database connection.  PGHOST is the hostname of the
+     PostgreSQL backend to connect to.  PGPORT is the TCP port number
+     to use.  PGOPTIONS specifies other backend options.  PGTTY
+     specifies the debugging tty to use.  DBNAME specifies the database
+     name to use.
+
+     This routine was deprecated in libpq-6.5.
+
+ - Unimplemented Function: int PQsocket (PGconn *conn)
+     Return socket file descriptor to a backend database process.  CONN
+     database connection object.
+
+ - Unimplemented Function: void PQprint (FILE *fout, PGresult *res,
+          PGprintOpt *ps)
+     Print out the results of a query to a designated C stream.  FOUT C
+     stream to print to RES the query result object to print PS the
+     print options structure.
+
+     This routine is deprecated as of libpq-7.0 and cannot be sensibly
+     exported to XEmacs Lisp.
+
+ - Unimplemented Function: void PQdisplayTuples (PGresult *res, FILE
+          *fp, int fillAlign, char *fieldSep, int printHeader, int
+          quiet)
+     RES query result object to print FP C stream to print to FILLALIGN
+     pad the fields with spaces FIELDSEP field separator PRINTHEADER
+     display headers?  QUIET
+
+     This routine was deprecated in libpq-6.5.
+
+ - Unimplemented Function: void PQprintTuples (PGresult *res, FILE
+          *fout, int printAttName, int terseOutput, int width)
+     RES query result object to print FOUT C stream to print to
+     PRINTATTNAME print attribute names TERSEOUTPUT delimiter bars
+     WIDTH width of column, if 0, use variable width
+
+     This routine was deprecated in libpq-6.5.
+
+ - Unimplemented Function: int PQmblen (char *s, int encoding)
+     Determine length of a multibyte encoded char at `*s'.  S encoded
+     string ENCODING type of encoding
+
+     Compatibility note:  This function was introduced in libpq-7.0.
+
+ - Unimplemented Function: void PQtrace (PGconn *conn, FILE *debug_port)
+     Enable tracing on `debug_port'.  CONN database connection object.
+     DEBUG_PORT C output stream to use.
+
+ - Unimplemented Function: void PQuntrace (PGconn *conn)
+     Disable tracing.  CONN database connection object.
+
+ - Unimplemented Function: char *PQoidStatus (PGconn *conn)
+     Return the object id as a string of the last tuple inserted.  CONN
+     database connection object.
+
+     Compatibility note: This function is deprecated in libpq-7.0,
+     however it is used internally by the XEmacs binding code when
+     linked against versions prior to 7.0.
+
+ - Unimplemented Function: PGresult *PQfn (PGconn *conn, int fnid, int
+          *result_buf, int *result_len, int result_is_int, PQArgBlock
+          *args, int nargs)
+     "Fast path" interface -- not really recommended for application use
+     CONN A database connection object.  FNID RESULT_BUF RESULT_LEN
+     RESULT_IS_INT ARGS NARGS
+
+   The following set of very low level large object functions aren't
+appropriate to be exported to Lisp.
+
+ - Unimplemented Function: int pq-lo-open (PGconn *conn, int lobjid,
+          int mode)
+     CONN a database connection object.  LOBJID a large object ID.
+     MODE opening modes.
+
+ - Unimplemented Function: int pq-lo-close (PGconn *conn, int fd)
+     CONN a database connection object.  FD a large object file
+     descriptor
+
+ - Unimplemented Function: int pq-lo-read (PGconn *conn, int fd, char
+          *buf, int len)
+     CONN a database connection object.  FD a large object file
+     descriptor.  BUF buffer to read into.  LEN size of buffer.
+
+ - Unimplemented Function: int pq-lo-write (PGconn *conn, int fd, char
+          *buf, size_t len)
+     CONN a database connection object.  FD a large object file
+     descriptor.  BUF buffer to write from.  LEN size of buffer.
+
+ - Unimplemented Function: int pq-lo-lseek (PGconn *conn, int fd, int
+          offset, int whence)
+     CONN a database connection object.  FD a large object file
+     descriptor.  OFFSET WHENCE
+
+ - Unimplemented Function: int pq-lo-creat (PGconn *conn, int mode)
+     CONN a database connection object.  MODE opening modes.
+
+ - Unimplemented Function: int pq-lo-tell (PGconn *conn, int fd)
+     CONN a database connection object.  FD a large object file
+     descriptor.
+
+ - Unimplemented Function: int pq-lo-unlink (PGconn *conn, int lobjid)
+     CONN a database connection object.  LBOJID a large object ID.
+
+\1f
+File: lispref.info,  Node: XEmacs PostgreSQL libpq Examples,  Prev: XEmacs PostgreSQL libpq API,  Up: PostgreSQL Support
+
+XEmacs PostgreSQL libpq Examples
+================================
+
+   This is an example of one method of establishing an asynchronous
+connection.
+
+     (defun database-poller (P)
+       (message "%S before poll" (pq-pgconn P 'pq::status))
+       (pq-connect-poll P)
+       (message "%S after poll" (pq-pgconn P 'pq::status))
+       (if (eq (pq-pgconn P 'pq::status) 'pg::connection-ok)
+           (message "Done!")
+         (add-timeout .1 'database-poller P)))
+          => database-poller
+     (progn
+       (setq P (pq-connect-start ""))
+       (add-timeout .1 'database-poller P))
+          => pg::connection-started before poll
+          => pg::connection-made after poll
+          => pg::connection-made before poll
+          => pg::connection-awaiting-response after poll
+          => pg::connection-awaiting-response before poll
+          => pg::connection-auth-ok after poll
+          => pg::connection-auth-ok before poll
+          => pg::connection-setenv after poll
+          => pg::connection-setenv before poll
+          => pg::connection-ok after poll
+          => Done!
+     P
+          => #<PGconn localhost:25432 steve/steve>
+
+   Here is an example of one method of doing an asynchronous reset.
+
+     (defun database-poller (P)
+       (let (PS)
+         (message "%S before poll" (pq-pgconn P 'pq::status))
+         (setq PS (pq-reset-poll P))
+         (message "%S after poll [%S]" (pq-pgconn P 'pq::status) PS)
+         (if (eq (pq-pgconn P 'pq::status) 'pg::connection-ok)
+       (message "Done!")
+           (add-timeout .1 'database-poller P))))
+          => database-poller
+     (progn
+       (pq-reset-start P)
+       (add-timeout .1 'database-poller P))
+          => pg::connection-started before poll
+          => pg::connection-made after poll [pgres::polling-writing]
+          => pg::connection-made before poll
+          => pg::connection-awaiting-response after poll [pgres::polling-reading]
+          => pg::connection-awaiting-response before poll
+          => pg::connection-setenv after poll [pgres::polling-reading]
+          => pg::connection-setenv before poll
+          => pg::connection-ok after poll [pgres::polling-ok]
+          => Done!
+     P
+          => #<PGconn localhost:25432 steve/steve>
+
+   And finally, an asynchronous query.
+
+     (defun database-poller (P)
+       (let (R)
+         (pq-consume-input P)
+         (if (pq-is-busy P)
+       (add-timeout .1 'database-poller P)
+           (setq R (pq-get-result P))
+           (if R
+         (progn
+           (push R result-list)
+           (add-timeout .1 'database-poller P))))))
+          => database-poller
+     (when (pq-send-query P "SELECT * FROM xemacs_test;")
+       (setq result-list nil)
+       (add-timeout .1 'database-poller P))
+          => 885
+     ;; wait a moment
+     result-list
+          => (#<PGresult PGRES_TUPLES_OK - SELECT>)
+
+   Here is an example showing how multiple SQL statements in a single
+query can have all their results collected.
+     ;; Using the same `database-poller' function from the previous example
+     (when (pq-send-query P "SELECT * FROM xemacs_test;
+     SELECT * FROM pg_database;
+     SELECT * FROM pg_user;")
+       (setq result-list nil)
+       (add-timeout .1 'database-poller P))
+          => 1782
+     ;; wait a moment
+     result-list
+          => (#<PGresult PGRES_TUPLES_OK - SELECT> #<PGresult PGRES_TUPLES_OK - SELECT> #<PGresult PGRES_TUPLES_OK - SELECT>)
+
+   Here is an example which illustrates collecting all data from a
+query, including the field names.
+
+     (defun pg-util-query-results (results)
+       "Retrieve results of last SQL query into a list structure."
+       (let ((i (1- (pq-ntuples R)))
+       j l1 l2)
+         (while (>= i 0)
+           (setq j (1- (pq-nfields R)))
+           (setq l2 nil)
+           (while (>= j 0)
+       (push (pq-get-value R i j) l2)
+       (decf j))
+           (push l2 l1)
+           (decf i))
+         (setq j (1- (pq-nfields R)))
+         (setq l2 nil)
+         (while (>= j 0)
+           (push (pq-fname R j) l2)
+           (decf j))
+         (push l2 l1)
+         l1))
+          => pg-util-query-results
+     (setq R (pq-exec P "SELECT * FROM xemacs_test ORDER BY field2 DESC;"))
+          => #<PGresult PGRES_TUPLES_OK - SELECT>
+     (pg-util-query-results R)
+          => (("f1" "field2") ("a" "97") ("b" "97") ("stuff" "42") ("a string" "12") ("foo" "10") ("string" "2") ("text" "1"))
+
+   Here is an example of a query that uses a database cursor.
+
+     (let (data R)
+       (setq R (pq-exec P "BEGIN;"))
+       (setq R (pq-exec P "DECLARE k_cursor CURSOR FOR SELECT * FROM xemacs_test ORDER BY f1 DESC;"))
+     
+       (setq R (pq-exec P "FETCH k_cursor;"))
+       (while (eq (pq-ntuples R) 1)
+         (push (list (pq-get-value R 0 0) (pq-get-value R 0 1)) data)
+         (setq R (pq-exec P "FETCH k_cursor;")))
+       (setq R (pq-exec P "END;"))
+       data)
+          => (("a" "97") ("a string" "12") ("b" "97") ("foo" "10") ("string" "2") ("stuff" "42") ("text" "1"))
+
+   Here's another example of cursors, this time with a Lisp macro to
+implement a mapping function over a table.
+
+     (defmacro map-db (P table condition callout)
+       `(let (R)
+          (pq-exec ,P "BEGIN;")
+          (pq-exec ,P (concat "DECLARE k_cursor CURSOR FOR SELECT * FROM "
+                        ,table
+                        " "
+                        ,condition
+                        " ORDER BY f1 DESC;"))
+          (setq R (pq-exec P "FETCH k_cursor;"))
+          (while (eq (pq-ntuples R) 1)
+            (,callout (pq-get-value R 0 0) (pq-get-value R 0 1))
+            (setq R (pq-exec P "FETCH k_cursor;")))
+          (pq-exec P "END;")))
+          => map-db
+     (defun callback (arg1 arg2)
+       (message "arg1 = %s, arg2 = %s" arg1 arg2))
+          => callback
+     (map-db P "xemacs_test" "WHERE field2 > 10" callback)
+          => arg1 = stuff, arg2 = 42
+          => arg1 = b, arg2 = 97
+          => arg1 = a string, arg2 = 12
+          => arg1 = a, arg2 = 97
+          => #<PGresult PGRES_COMMAND_OK - COMMIT>
+
+\1f
+File: lispref.info,  Node: Internationalization,  Next: MULE,  Prev: PostgreSQL Support,  Up: Top
+
+Internationalization
+********************
+
+* Menu:
+
+* I18N Levels 1 and 2:: Support for different time, date, and currency formats.
+* I18N Level 3::        Support for localized messages.
+* I18N Level 4::        Support for Asian languages.
+
+\1f
+File: lispref.info,  Node: I18N Levels 1 and 2,  Next: I18N Level 3,  Up: Internationalization
+
+I18N Levels 1 and 2
+===================
+
+   XEmacs is now compliant with I18N levels 1 and 2.  Specifically,
+this means that it is 8-bit clean and correctly handles time and date
+functions.  XEmacs will correctly display the entire ISO-Latin 1
+character set.
+
+   The compose key may now be used to create any character in the
+ISO-Latin 1 character set not directly available via the keyboard..  In
+order for the compose key to work it is necessary to load the file
+`x-compose.el'.  At any time while composing a character, `C-h' will
+display all valid completions and the character which would be produced.
+
+\1f
+File: lispref.info,  Node: I18N Level 3,  Next: I18N Level 4,  Prev: I18N Levels 1 and 2,  Up: Internationalization
+
+I18N Level 3
+============
+
+* Menu:
+
+* Level 3 Basics::
+* Level 3 Primitives::
+* Dynamic Messaging::
+* Domain Specification::
+* Documentation String Extraction::
+
+\1f
+File: lispref.info,  Node: Level 3 Basics,  Next: Level 3 Primitives,  Up: I18N Level 3
+
+Level 3 Basics
+--------------
+
+   XEmacs now provides alpha-level functionality for I18N Level 3.
+This means that everything necessary for full messaging is available,
+but not every file has been converted.
+
+   The two message files which have been created are `src/emacs.po' and
+`lisp/packages/mh-e.po'.  Both files need to be converted using
+`msgfmt', and the resulting `.mo' files placed in some locale's
+`LC_MESSAGES' directory.  The test "translations" in these files are
+the original messages prefixed by `TRNSLT_'.
+
+   The domain for a variable is stored on the variable's property list
+under the property name VARIABLE-DOMAIN.  The function
+`documentation-property' uses this information when translating a
+variable's documentation.
+
+\1f
+File: lispref.info,  Node: Level 3 Primitives,  Next: Dynamic Messaging,  Prev: Level 3 Basics,  Up: I18N Level 3
+
+Level 3 Primitives
+------------------
+
+ - Function: gettext string
+     This function looks up STRING in the default message domain and
+     returns its translation.  If `I18N3' was not enabled when XEmacs
+     was compiled, it just returns STRING.
+
+ - Function: dgettext domain string
+     This function looks up STRING in the specified message domain and
+     returns its translation.  If `I18N3' was not enabled when XEmacs
+     was compiled, it just returns STRING.
+
+ - Function: bind-text-domain domain pathname
+     This function associates a pathname with a message domain.  Here's
+     how the path to message file is constructed under SunOS 5.x:
+
+          `{pathname}/{LANG}/LC_MESSAGES/{domain}.mo'
+
+     If `I18N3' was not enabled when XEmacs was compiled, this function
+     does nothing.
+
+ - Special Form: domain string
+     This function specifies the text domain used for translating
+     documentation strings and interactive prompts of a function.  For
+     example, write:
+
+          (defun foo (arg) "Doc string" (domain "emacs-foo") ...)
+
+     to specify `emacs-foo' as the text domain of the function `foo'.
+     The "call" to `domain' is actually a declaration rather than a
+     function; when actually called, `domain' just returns `nil'.
+
+ - Function: domain-of function
+     This function returns the text domain of FUNCTION; it returns
+     `nil' if it is the default domain.  If `I18N3' was not enabled
+     when XEmacs was compiled, it always returns `nil'.
+
+\1f
+File: lispref.info,  Node: Dynamic Messaging,  Next: Domain Specification,  Prev: Level 3 Primitives,  Up: I18N Level 3
+
+Dynamic Messaging
+-----------------
+
+   The `format' function has been extended to permit you to change the
+order of parameter insertion.  For example, the conversion format
+`%1$s' inserts parameter one as a string, while `%2$s' inserts
+parameter two.  This is useful when creating translations which require
+you to change the word order.
+
+\1f
+File: lispref.info,  Node: Domain Specification,  Next: Documentation String Extraction,  Prev: Dynamic Messaging,  Up: I18N Level 3
+
+Domain Specification
+--------------------
+
+   The default message domain of XEmacs is `emacs'.  For add-on
+packages, it is best to use a different domain.  For example, let us
+say we want to convert the "gorilla" package to use the domain
+`emacs-gorilla'.  To translate the message "What gorilla?", use
+`dgettext' as follows:
+
+     (dgettext "emacs-gorilla" "What gorilla?")
+
+   A function (or macro) which has a documentation string or an
+interactive prompt needs to be associated with the domain in order for
+the documentation or prompt to be translated.  This is done with the
+`domain' special form as follows:
+
+     (defun scratch (location)
+       "Scratch the specified location."
+       (domain "emacs-gorilla")
+       (interactive "sScratch: ")
+       ... )
+
+   It is most efficient to specify the domain in the first line of the
+function body, before the `interactive' form.
+
+   For variables and constants which have documentation strings,
+specify the domain after the documentation.
+
+ - Special Form: defvar symbol [value [doc-string [domain]]]
+     Example:
+          (defvar weight 250 "Weight of gorilla, in pounds." "emacs-gorilla")
+
+ - Special Form: defconst symbol [value [doc-string [domain]]]
+     Example:
+          (defconst limbs 4 "Number of limbs" "emacs-gorilla")
+
+   Autoloaded functions which are specified in `loaddefs.el' do not need
+to have a domain specification, because their documentation strings are
+extracted into the main message base.  However, for autoloaded functions
+which are specified in a separate package, use following syntax:
+
+ - Function: autoload symbol filename &optional docstring interactive
+          macro domain
+     Example:
+          (autoload 'explore "jungle" "Explore the jungle." nil nil "emacs-gorilla")
+
+\1f
+File: lispref.info,  Node: Documentation String Extraction,  Prev: Domain Specification,  Up: I18N Level 3
+
+Documentation String Extraction
+-------------------------------
+
+   The utility `etc/make-po' scans the file `DOC' to extract
+documentation strings and creates a message file `doc.po'.  This file
+may then be inserted within `emacs.po'.
+
+   Currently, `make-po' is hard-coded to read from `DOC' and write to
+`doc.po'.  In order to extract documentation strings from an add-on
+package, first run `make-docfile' on the package to produce the `DOC'
+file.  Then run `make-po -p' with the `-p' argument to indicate that we
+are extracting documentation for an add-on package.
+
+   (The `-p' argument is a kludge to make up for a subtle difference
+between pre-loaded documentation and add-on documentation:  For add-on
+packages, the final carriage returns in the strings produced by
+`make-docfile' must be ignored.)
+
+\1f
+File: lispref.info,  Node: I18N Level 4,  Prev: I18N Level 3,  Up: Internationalization
+
+I18N Level 4
+============
+
+   The Asian-language support in XEmacs is called "MULE".  *Note MULE::.
+
+\1f
+File: lispref.info,  Node: MULE,  Next: Tips,  Prev: Internationalization,  Up: Top
+
+MULE
+****
+
+   "MULE" is the name originally given to the version of GNU Emacs
+extended for multi-lingual (and in particular Asian-language) support.
+"MULE" is short for "MUlti-Lingual Emacs".  It is an extension and
+complete rewrite of Nemacs ("Nihon Emacs" where "Nihon" is the Japanese
+word for "Japan"), which only provided support for Japanese.  XEmacs
+refers to its multi-lingual support as "MULE support" since it is based
+on "MULE".
+
+* Menu:
+
+* Internationalization Terminology::
+                        Definition of various internationalization terms.
+* Charsets::            Sets of related characters.
+* MULE Characters::     Working with characters in XEmacs/MULE.
+* Composite Characters:: Making new characters by overstriking other ones.
+* Coding Systems::      Ways of representing a string of chars using integers.
+* CCL::                 A special language for writing fast converters.
+* Category Tables::     Subdividing charsets into groups.
+
+\1f
  File: lispref.info,  Node: Internationalization Terminology,  Next: Charsets,  Up: MULE
  
  Internationalization Terminology
@@ -639,396 +1146,3 @@ charsets work.)
                                          encodings.
  * Predefined Coding Systems::         Coding systems implemented by MULE.
  
-\1f
-File: lispref.info,  Node: Coding System Types,  Next: ISO 2022,  Up: Coding Systems
-
-Coding System Types
--------------------
-
-   The coding system type determines the basic algorithm XEmacs will
-use to decode or encode a data stream.  Character encodings will be
-converted to the MULE encoding, escape sequences processed, and newline
-sequences converted to XEmacs's internal representation.  There are
-three basic classes of coding system type: no-conversion, ISO-2022, and
-special.
-
-   No conversion allows you to look at the file's internal
-representation.  Since XEmacs is basically a text editor, "no
-conversion" does convert newline conventions by default.  (Use the
-'binary coding-system if this is not desired.)
-
-   ISO 2022 (*note ISO 2022::) is the basic international standard
-regulating use of "coded character sets for the exchange of data", ie,
-text streams.  ISO 2022 contains functions that make it possible to
-encode text streams to comply with restrictions of the Internet mail
-system and de facto restrictions of most file systems (eg, use of the
-separator character in file names).  Coding systems which are not ISO
-2022 conformant can be difficult to handle.  Perhaps more important,
-they are not adaptable to multilingual information interchange, with
-the obvious exception of ISO 10646 (Unicode).  (Unicode is partially
-supported by XEmacs with the addition of the Lisp package ucs-conv.)
-
-   The special class of coding systems includes automatic detection,
-CCL (a "little language" embedded as an interpreter, useful for
-translating between variants of a single character set),
-non-ISO-2022-conformant encodings like Unicode, Shift JIS, and Big5,
-and MULE internal coding.  (NB: this list is based on XEmacs 21.2.
-Terminology may vary slightly for other versions of XEmacs and for GNU
-Emacs 20.)
-
-`no-conversion'
-     No conversion, for binary files, and a few special cases of
-     non-ISO-2022 coding systems where conversion is done by hook
-     functions (usually implemented in CCL).  On output, graphic
-     characters that are not in ASCII or Latin-1 will be replaced by a
-     `?'. (For a no-conversion-encoded buffer, these characters will
-     only be present if you explicitly insert them.)
-
-`iso2022'
-     Any ISO-2022-compliant encoding.  Among others, this includes JIS
-     (the Japanese encoding commonly used for e-mail), national
-     variants of EUC (the standard Unix encoding for Japanese and other
-     languages), and Compound Text (an encoding used in X11).  You can
-     specify more specific information about the conversion with the
-     FLAGS argument.
-
-`ucs-4'
-     ISO 10646 UCS-4 encoding.  A 31-bit fixed-width superset of
-     Unicode.
-
-`utf-8'
-     ISO 10646 UTF-8 encoding.  A "file system safe" transformation
-     format that can be used with both UCS-4 and Unicode.
-
-`undecided'
-     Automatic conversion.  XEmacs attempts to detect the coding system
-     used in the file.
-
-`shift-jis'
-     Shift-JIS (a Japanese encoding commonly used in PC operating
-     systems).
-
-`big5'
-     Big5 (the encoding commonly used for Taiwanese).
-
-`ccl'
-     The conversion is performed using a user-written pseudo-code
-     program.  CCL (Code Conversion Language) is the name of this
-     pseudo-code.  For example, CCL is used to map KOI8-R characters
-     (an encoding for Russian Cyrillic) to ISO8859-5 (the form used
-     internally by MULE).
-
-`internal'
-     Write out or read in the raw contents of the memory representing
-     the buffer's text.  This is primarily useful for debugging
-     purposes, and is only enabled when XEmacs has been compiled with
-     `DEBUG_XEMACS' set (the `--debug' configure option).  *Warning*:
-     Reading in a file using `internal' conversion can result in an
-     internal inconsistency in the memory representing a buffer's text,
-     which will produce unpredictable results and may cause XEmacs to
-     crash.  Under normal circumstances you should never use `internal'
-     conversion.
-
-\1f
-File: lispref.info,  Node: ISO 2022,  Next: EOL Conversion,  Prev: Coding System Types,  Up: Coding Systems
-
-ISO 2022
-========
-
-   This section briefly describes the ISO 2022 encoding standard.  A
-more thorough treatment is available in the original document of ISO
-2022 as well as various national standards (such as JIS X 0202).
-
-   Character sets ("charsets") are classified into the following four
-categories, according to the number of characters in the charset:
-94-charset, 96-charset, 94x94-charset, and 96x96-charset.  This means
-that although an ISO 2022 coding system may have variable width
-characters, each charset used is fixed-width (in contrast to the MULE
-character set and UTF-8, for example).
-
-   ISO 2022 provides for switching between character sets via escape
-sequences.  This switching is somewhat complicated, because ISO 2022
-provides for both legacy applications like Internet mail that accept
-only 7 significant bits in some contexts (RFC 822 headers, for example),
-and more modern "8-bit clean" applications.  It also provides for
-compact and transparent representation of languages like Japanese which
-mix ASCII and a national script (even outside of computer programs).
-
-   First, ISO 2022 codified prevailing practice by dividing the code
-space into "control" and "graphic" regions.  The code points 0x00-0x1F
-and 0x80-0x9F are reserved for "control characters", while "graphic
-characters" must be assigned to code points in the regions 0x20-0x7F and
-0xA0-0xFF.  The positions 0x20 and 0x7F are special, and under some
-circumstances must be assigned the graphic character "ASCII SPACE" and
-the control character "ASCII DEL" respectively.
-
-   The various regions are given the name C0 (0x00-0x1F), GL
-(0x20-0x7F), C1 (0x80-0x9F), and GR (0xA0-0xFF).  GL and GR stand for
-"graphic left" and "graphic right", respectively, because of the
-standard method of displaying graphic character sets in tables with the
-high byte indexing columns and the low byte indexing rows.  I don't
-find it very intuitive, but these are called "registers".
-
-   An ISO 2022-conformant encoding for a graphic character set must use
-a fixed number of bytes per character, and the values must fit into a
-single register; that is, each byte must range over either 0x20-0x7F, or
-0xA0-0xFF.  It is not allowed to extend the range of the repertoire of a
-character set by using both ranges at the same.  This is why a standard
-character set such as ISO 8859-1 is actually considered by ISO 2022 to
-be an aggregation of two character sets, ASCII and LATIN-1, and why it
-is technically incorrect to refer to ISO 8859-1 as "Latin 1".  Also, a
-single character's bytes must all be drawn from the same register; this
-is why Shift JIS (for Japanese) and Big 5 (for Chinese) are not ISO
-2022-compatible encodings.
-
-   The reason for this restriction becomes clear when you attempt to
-define an efficient, robust encoding for a language like Japanese.
-Like ISO 8859, Japanese encodings are aggregations of several character
-sets.  In practice, the vast majority of characters are drawn from the
-"JIS Roman" character set (a derivative of ASCII; it won't hurt to
-think of it as ASCII) and the JIS X 0208 standard "basic Japanese"
-character set including not only ideographic characters ("kanji") but
-syllabic Japanese characters ("kana"), a wide variety of symbols, and
-many alphabetic characters (Roman, Greek, and Cyrillic) as well.
-Although JIS X 0208 includes the whole Roman alphabet, as a 2-byte code
-it is not suited to programming; thus the inclusion of ASCII in the
-standard Japanese encodings.
-
-   For normal Japanese text such as in newspapers, a broad repertoire of
-approximately 3000 characters is used.  Evidently this won't fit into
-one byte; two must be used.  But much of the text processed by Japanese
-computers is computer source code, nearly all of which is ASCII.  A not
-insignificant portion of ordinary text is English (as such or as
-borrowed Japanese vocabulary) or other languages which can represented
-at least approximately in ASCII, as well.  It seems reasonable then to
-represent ASCII in one byte, and JIS X 0208 in two.  And this is exactly
-what the Extended Unix Code for Japanese (EUC-JP) does.  ASCII is
-invoked to the GL register, and JIS X 0208 is invoked to the GR
-register.  Thus, each byte can be tested for its character set by
-looking at the high bit; if set, it is Japanese, if clear, it is ASCII.
-Furthermore, since control characters like newline can never be part of
-a graphic character, even in the case of corruption in transmission the
-stream will be resynchronized at every line break, on the order of 60-80
-bytes.  This coding system requires no escape sequences or special
-control codes to represent 99.9% of all Japanese text.
-
-   Note carefully the distinction between the character sets (ASCII and
-JIS X 0208), the encoding (EUC-JP), and the coding system (ISO 2022).
-The JIS X 0208 character set is used in three different encodings for
-Japanese, but in ISO-2022-JP it is invoked into GL (so the high bit is
-always clear), in EUC-JP it is invoked into GR (setting the high bit in
-the process), and in Shift JIS the high bit may be set or reset, and the
-significant bits are shifted within the 16-bit character so that the two
-main character sets can coexist with a third (the "halfwidth katakana"
-of JIS X 0201).  As the name implies, the ISO-2022-JP encoding is also a
-version of the ISO-2022 coding system.
-
-   In order to systematically treat subsidiary character sets (like the
-"halfwidth katakana" already mentioned, and the "supplementary kanji" of
-JIS X 0212), four further registers are defined: G0, G1, G2, and G3.
-Unlike GL and GR, they are not logically distinguished by internal
-format.  Instead, the process of "invocation" mentioned earlier is
-broken into two steps: first, a character set is "designated" to one of
-the registers G0-G3 by use of an "escape sequence" of the form:
-
-             ESC [I] I F
-
-   where I is an intermediate character or characters in the range 0x20
-- 0x3F, and F, from the range 0x30-0x7Fm is the final character
-identifying this charset.  (Final characters in the range 0x30-0x3F are
-reserved for private use and will never have a publically registered
-meaning.)
-
-   Then that register is "invoked" to either GL or GR, either
-automatically (designations to G0 normally involve invocation to GL as
-well), or by use of shifting (affecting only the following character in
-the data stream) or locking (effective until the next designation or
-locking) control sequences.  An encoding conformant to ISO 2022 is
-typically defined by designating the initial contents of the G0-G3
-registers, specifying an 7 or 8 bit environment, and specifying whether
-further designations will be recognized.
-
-   Some examples of character sets and the registered final characters
-F used to designate them:
-
-94-charset
-     ASCII (B), left (J) and right (I) half of JIS X 0201, ...
-
-96-charset
-     Latin-1 (A), Latin-2 (B), Latin-3 (C), ...
-
-94x94-charset
-     GB2312 (A), JIS X 0208 (B), KSC5601 (C), ...
-
-96x96-charset
-     none for the moment
-
-   The meanings of the various characters in these sequences, where not
-specified by the ISO 2022 standard (such as the ESC character), are
-assigned by "ECMA", the European Computer Manufacturers Association.
-
-   The meaning of intermediate characters are:
-
-             $ [0x24]: indicate charset of dimension 2 (94x94 or 96x96).
-             ( [0x28]: designate to G0 a 94-charset whose final byte is F.
-             ) [0x29]: designate to G1 a 94-charset whose final byte is F.
-             * [0x2A]: designate to G2 a 94-charset whose final byte is F.
-             + [0x2B]: designate to G3 a 94-charset whose final byte is F.
-             , [0x2C]: designate to G0 a 96-charset whose final byte is F.
-             - [0x2D]: designate to G1 a 96-charset whose final byte is F.
-             . [0x2E]: designate to G2 a 96-charset whose final byte is F.
-             / [0x2F]: designate to G3 a 96-charset whose final byte is F.
-
-   The comma may be used in files read and written only by MULE, as a
-MULE extension, but this is illegal in ISO 2022.  (The reason is that
-in ISO 2022 G0 must be a 94-member character set, with 0x20 assigned
-the value SPACE, and 0x7F assigned the value DEL.)
-
-   Here are examples of designations:
-
-             ESC ( B :              designate to G0 ASCII
-             ESC - A :              designate to G1 Latin-1
-             ESC $ ( A or ESC $ A : designate to G0 GB2312
-             ESC $ ( B or ESC $ B : designate to G0 JISX0208
-             ESC $ ) C :            designate to G1 KSC5601
-
-   (The short forms used to designate GB2312 and JIS X 0208 are for
-backwards compatibility; the long forms are preferred.)
-
-   To use a charset designated to G2 or G3, and to use a charset
-designated to G1 in a 7-bit environment, you must explicitly invoke G1,
-G2, or G3 into GL.  There are two types of invocation, Locking Shift
-(forever) and Single Shift (one character only).
-
-   Locking Shift is done as follows:
-
-             LS0 or SI (0x0F): invoke G0 into GL
-             LS1 or SO (0x0E): invoke G1 into GL
-             LS2:  invoke G2 into GL
-             LS3:  invoke G3 into GL
-             LS1R: invoke G1 into GR
-             LS2R: invoke G2 into GR
-             LS3R: invoke G3 into GR
-
-   Single Shift is done as follows:
-
-             SS2 or ESC N: invoke G2 into GL
-             SS3 or ESC O: invoke G3 into GL
-
-   The shift functions (such as LS1R and SS3) are represented by control
-characters (from C1) in 8 bit environments and by escape sequences in 7
-bit environments.
-
-   (#### Ben says: I think the above is slightly incorrect.  It appears
-that SS2 invokes G2 into GR and SS3 invokes G3 into GR, whereas ESC N
-and ESC O behave as indicated.  The above definitions will not parse
-EUC-encoded text correctly, and it looks like the code in mule-coding.c
-has similar problems.)
-
-   Evidently there are a lot of ISO-2022-compliant ways of encoding
-multilingual text.  Now, in the world, there exist many coding systems
-such as X11's Compound Text, Japanese JUNET code, and so-called EUC
-(Extended UNIX Code); all of these are variants of ISO 2022.
-
-   In MULE, we characterize a version of ISO 2022 by the following
-attributes:
-
-  1. The character sets initially designated to G0 thru G3.
-
-  2. Whether short form designations are allowed for Japanese and
-     Chinese.
-
-  3. Whether ASCII should be designated to G0 before control characters.
-
-  4. Whether ASCII should be designated to G0 at the end of line.
-
-  5. 7-bit environment or 8-bit environment.
-
-  6. Whether Locking Shifts are used or not.
-
-  7. Whether to use ASCII or the variant JIS X 0201-1976-Roman.
-
-  8. Whether to use JIS X 0208-1983 or the older version JIS X
-     0208-1976.
-
-   (The last two are only for Japanese.)
-
-   By specifying these attributes, you can create any variant of ISO
-2022.
-
-   Here are several examples:
-
-     ISO-2022-JP -- Coding system used in Japanese email (RFC 1463 #### check).
-             1. G0 <- ASCII, G1..3 <- never used
-             2. Yes.
-             3. Yes.
-             4. Yes.
-             5. 7-bit environment
-             6. No.
-             7. Use ASCII
-             8. Use JIS X 0208-1983
-     
-     ctext -- X11 Compound Text
-             1. G0 <- ASCII, G1 <- Latin-1, G2,3 <- never used.
-             2. No.
-             3. No.
-             4. Yes.
-             5. 8-bit environment.
-             6. No.
-             7. Use ASCII.
-             8. Use JIS X 0208-1983.
-     
-     euc-china -- Chinese EUC.  Often called the "GB encoding", but that is
-     technically incorrect.
-             1. G0 <- ASCII, G1 <- GB 2312, G2,3 <- never used.
-             2. No.
-             3. Yes.
-             4. Yes.
-             5. 8-bit environment.
-             6. No.
-             7. Use ASCII.
-             8. Use JIS X 0208-1983.
-     
-     ISO-2022-KR -- Coding system used in Korean email.
-             1. G0 <- ASCII, G1 <- KSC 5601, G2,3 <- never used.
-             2. No.
-             3. Yes.
-             4. Yes.
-             5. 7-bit environment.
-             6. Yes.
-             7. Use ASCII.
-             8. Use JIS X 0208-1983.
-
-   MULE creates all of these coding systems by default.
-
-\1f
-File: lispref.info,  Node: EOL Conversion,  Next: Coding System Properties,  Prev: ISO 2022,  Up: Coding Systems
-
-EOL Conversion
---------------
-
-`nil'
-     Automatically detect the end-of-line type (LF, CRLF, or CR).  Also
-     generate subsidiary coding systems named `NAME-unix', `NAME-dos',
-     and `NAME-mac', that are identical to this coding system but have
-     an EOL-TYPE value of `lf', `crlf', and `cr', respectively.
-
-`lf'
-     The end of a line is marked externally using ASCII LF.  Since this
-     is also the way that XEmacs represents an end-of-line internally,
-     specifying this option results in no end-of-line conversion.  This
-     is the standard format for Unix text files.
-
-`crlf'
-     The end of a line is marked externally using ASCII CRLF.  This is
-     the standard format for MS-DOS text files.
-
-`cr'
-     The end of a line is marked externally using ASCII CR.  This is the
-     standard format for Macintosh text files.
-
-`t'
-     Automatically detect the end-of-line type but do not generate
-     subsidiary coding systems.  (This value is converted to `nil' when
-     stored internally, and `coding-system-property' will return `nil'.)
-