New file.

author tomo <tomo>

Thu, 9 Sep 1999 06:08:42 +0000 (06:08 +0000)

committer tomo <tomo>

Thu, 9 Sep 1999 06:08:42 +0000 (06:08 +0000)
author tomo <tomo>
Thu, 9 Sep 1999 06:08:42 +0000 (06:08 +0000)
committer tomo <tomo>
Thu, 9 Sep 1999 06:08:42 +0000 (06:08 +0000)
diff --git a/src/text-coding.c b/src/text-coding.c

new file mode 100644 (file)

index 0000000..f1dd835
--- /dev/null
+++ b/src/text-coding.c
@@ -0,0 +1,5888 @@
+/* Code conversion functions.
+   Copyright (C) 1991, 1995 Free Software Foundation, Inc.
+   Copyright (C) 1995 Sun Microsystems, Inc.
+
+This file is part of XEmacs.
+
+XEmacs is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+XEmacs is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with XEmacs; see the file COPYING.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+/* Synched up with: Mule 2.3.   Not in FSF. */
+
+/* Rewritten by Ben Wing <ben@xemacs.org>. */
+
+#include <config.h>
+#include "lisp.h"
+
+#include "buffer.h"
+#include "elhash.h"
+#include "insdel.h"
+#include "lstream.h"
+#ifdef MULE
+#include "mule-ccl.h"
+#include "chartab.h"
+#endif
+#include "file-coding.h"
+
+Lisp_Object Qbuffer_file_coding_system, Qcoding_system_error;
+
+Lisp_Object Vkeyboard_coding_system;
+Lisp_Object Vterminal_coding_system;
+Lisp_Object Vcoding_system_for_read;
+Lisp_Object Vcoding_system_for_write;
+Lisp_Object Vfile_name_coding_system;
+
+/* Table of symbols identifying each coding category. */
+Lisp_Object coding_category_symbol[CODING_CATEGORY_LAST + 1];
+
+/* Coding system currently associated with each coding category. */
+Lisp_Object coding_category_system[CODING_CATEGORY_LAST + 1];
+
+/* Table of all coding categories in decreasing order of priority.
+   This describes a permutation of the possible coding categories. */
+int coding_category_by_priority[CODING_CATEGORY_LAST + 1];
+
+Lisp_Object Qcoding_system_p;
+
+Lisp_Object Qraw_text, Qno_conversion, Qccl, Qiso2022;
+/* Qinternal in general.c */
+
+Lisp_Object Qmnemonic, Qeol_type;
+Lisp_Object Qcr, Qcrlf, Qlf;
+Lisp_Object Qeol_cr, Qeol_crlf, Qeol_lf;
+Lisp_Object Qpost_read_conversion;
+Lisp_Object Qpre_write_conversion;
+
+#ifdef MULE
+Lisp_Object Qucs4, Qutf8;
+Lisp_Object Qbig5, Qshift_jis;
+Lisp_Object Qcharset_g0, Qcharset_g1, Qcharset_g2, Qcharset_g3;
+Lisp_Object Qforce_g0_on_output, Qforce_g1_on_output;
+Lisp_Object Qforce_g2_on_output, Qforce_g3_on_output;
+Lisp_Object Qno_iso6429;
+Lisp_Object Qinput_charset_conversion, Qoutput_charset_conversion;
+Lisp_Object Qctext, Qescape_quoted;
+Lisp_Object Qshort, Qno_ascii_eol, Qno_ascii_cntl, Qseven, Qlock_shift;
+#endif
+Lisp_Object Qencode, Qdecode;
+
+Lisp_Object Vcoding_system_hash_table;
+
+int enable_multibyte_characters;
+
+#ifdef MULE
+/* Additional information used by the ISO2022 decoder and detector. */
+struct iso2022_decoder
+{
+  /* CHARSET holds the character sets currently assigned to the G0
+     through G3 variables.  It is initialized from the array
+     INITIAL_CHARSET in CODESYS. */
+  Lisp_Object charset[4];
+
+  /* Which registers are currently invoked into the left (GL) and
+     right (GR) halves of the 8-bit encoding space? */
+  int register_left, register_right;
+
+  /* ISO_ESC holds a value indicating part of an escape sequence
+     that has already been seen. */
+  enum iso_esc_flag esc;
+
+  /* This records the bytes we've seen so far in an escape sequence,
+     in case the sequence is invalid (we spit out the bytes unchanged). */
+  unsigned char esc_bytes[8];
+
+  /* Index for next byte to store in ISO escape sequence. */
+  int esc_bytes_index;
+
+#ifdef ENABLE_COMPOSITE_CHARS
+  /* Stuff seen so far when composing a string. */
+  unsigned_char_dynarr *composite_chars;
+#endif
+
+  /* If we saw an invalid designation sequence for a particular
+     register, we flag it here and switch to ASCII.  The next time we
+     see a valid designation for this register, we turn off the flag
+     and do the designation normally, but pretend the sequence was
+     invalid.  The effect of all this is that (most of the time) the
+     escape sequences for both the switch to the unknown charset, and
+     the switch back to the known charset, get inserted literally into
+     the buffer and saved out as such.  The hope is that we can
+     preserve the escape sequences so that the resulting written out
+     file makes sense.  If we don't do any of this, the designation
+     to the invalid charset will be preserved but that switch back
+     to the known charset will probably get eaten because it was
+     the same charset that was already present in the register. */
+  unsigned char invalid_designated[4];
+
+  /* We try to do similar things as above for direction-switching
+     sequences.  If we encountered a direction switch while an
+     invalid designation was present, or an invalid designation
+     just after a direction switch (i.e. no valid designation
+     encountered yet), we insert the direction-switch escape
+     sequence literally into the output stream, and later on
+     insert the corresponding direction-restoring escape sequence
+     literally also. */
+  unsigned int switched_dir_and_no_valid_charset_yet :1;
+  unsigned int invalid_switch_dir :1;
+
+  /* Tells the decoder to output the escape sequence literally
+     even though it was valid.  Used in the games we play to
+     avoid lossage when we encounter invalid designations. */
+  unsigned int output_literally :1;
+  /* We encountered a direction switch followed by an invalid
+     designation.  We didn't output the direction switch
+     literally because we didn't know about the invalid designation;
+     but we have to do so now. */
+  unsigned int output_direction_sequence :1;
+};
+#endif /* MULE */
+EXFUN (Fcopy_coding_system, 2);
+#ifdef MULE
+struct detection_state;
+
+static void
+text_encode_generic (Lstream *encoding, CONST unsigned char *src,
+                    unsigned_char_dynarr *dst, unsigned int n);
+
+static int detect_coding_sjis (struct detection_state *st,
+                              CONST unsigned char *src,
+                              unsigned int n);
+static void decode_coding_sjis (Lstream *decoding,
+                               CONST unsigned char *src,
+                               unsigned_char_dynarr *dst,
+                               unsigned int n);
+static void encode_coding_sjis (Lstream *encoding,
+                               CONST unsigned char *src,
+                               unsigned_char_dynarr *dst,
+                               unsigned int n);
+static int detect_coding_big5 (struct detection_state *st,
+                              CONST unsigned char *src,
+                              unsigned int n);
+static void decode_coding_big5 (Lstream *decoding,
+                               CONST unsigned char *src,
+                               unsigned_char_dynarr *dst, unsigned int n);
+static void encode_coding_big5 (Lstream *encoding,
+                               CONST unsigned char *src,
+                               unsigned_char_dynarr *dst, unsigned int n);
+static int detect_coding_ucs4 (struct detection_state *st,
+                              CONST unsigned char *src,
+                              unsigned int n);
+static void decode_coding_ucs4 (Lstream *decoding,
+                               CONST unsigned char *src,
+                               unsigned_char_dynarr *dst, unsigned int n);
+static void encode_coding_ucs4 (Lstream *encoding,
+                               CONST unsigned char *src,
+                               unsigned_char_dynarr *dst, unsigned int n);
+static int detect_coding_utf8 (struct detection_state *st,
+                              CONST unsigned char *src,
+                              unsigned int n);
+static void decode_coding_utf8 (Lstream *decoding,
+                               CONST unsigned char *src,
+                               unsigned_char_dynarr *dst, unsigned int n);
+void char_encode_utf8 (struct encoding_stream *str, Emchar c,
+                      unsigned_char_dynarr *dst, unsigned int *flags);
+void char_finish_utf8 (struct encoding_stream *str,
+                      unsigned_char_dynarr *dst, unsigned int *flags);
+
+static int postprocess_iso2022_mask (int mask);
+static void reset_iso2022 (Lisp_Object coding_system,
+                          struct iso2022_decoder *iso);
+static int detect_coding_iso2022 (struct detection_state *st,
+                                 CONST unsigned char *src,
+                                 unsigned int n);
+static void decode_coding_iso2022 (Lstream *decoding,
+                                  CONST unsigned char *src,
+                                  unsigned_char_dynarr *dst, unsigned int n);
+void char_encode_iso2022 (struct encoding_stream *str, Emchar c,
+                         unsigned_char_dynarr *dst, unsigned int *flags);
+void char_finish_iso2022 (struct encoding_stream *str,
+                         unsigned_char_dynarr *dst, unsigned int *flags);
+#endif /* MULE */
+static void decode_coding_no_conversion (Lstream *decoding,
+                                        CONST unsigned char *src,
+                                        unsigned_char_dynarr *dst,
+                                        unsigned int n);
+static void encode_coding_no_conversion (Lstream *encoding,
+                                        CONST unsigned char *src,
+                                        unsigned_char_dynarr *dst,
+                                        unsigned int n);
+static void mule_decode (Lstream *decoding, CONST unsigned char *src,
+                        unsigned_char_dynarr *dst, unsigned int n);
+static void mule_encode (Lstream *encoding, CONST unsigned char *src,
+                        unsigned_char_dynarr *dst, unsigned int n);
+
+typedef struct codesys_prop codesys_prop;
+struct codesys_prop
+{
+  Lisp_Object sym;
+  int prop_type;
+};
+
+typedef struct
+{
+  Dynarr_declare (codesys_prop);
+} codesys_prop_dynarr;
+
+codesys_prop_dynarr *the_codesys_prop_dynarr;
+
+enum codesys_prop_enum
+{
+  CODESYS_PROP_ALL_OK,
+  CODESYS_PROP_ISO2022,
+  CODESYS_PROP_CCL
+};
+
+\f
+/************************************************************************/
+/*                       Coding system functions                        */
+/************************************************************************/
+
+static Lisp_Object mark_coding_system (Lisp_Object, void (*) (Lisp_Object));
+static void print_coding_system (Lisp_Object, Lisp_Object, int);
+static void finalize_coding_system (void *header, int for_disksave);
+
+#ifdef MULE
+static const struct lrecord_description ccs_description_1[] = {
+  { XD_LISP_OBJECT, offsetof(charset_conversion_spec, from_charset), 2 },
+  { XD_END }
+};
+
+static const struct struct_description ccs_description = {
+  sizeof(charset_conversion_spec),
+  ccs_description_1
+};
+  
+static const struct lrecord_description ccsd_description_1[] = {
+  XD_DYNARR_DESC(charset_conversion_spec_dynarr, &ccs_description),
+  { XD_END }
+};
+
+static const struct struct_description ccsd_description = {
+  sizeof(charset_conversion_spec_dynarr),
+  ccsd_description_1
+};
+#endif
+
+static const struct lrecord_description coding_system_description[] = {
+  { XD_LISP_OBJECT, offsetof(struct Lisp_Coding_System, name), 2 },
+  { XD_LISP_OBJECT, offsetof(struct Lisp_Coding_System, mnemonic), 3 },
+  { XD_LISP_OBJECT, offsetof(struct Lisp_Coding_System, eol_lf), 3 },
+#ifdef MULE
+  { XD_LISP_OBJECT, offsetof(struct Lisp_Coding_System, iso2022.initial_charset), 4 },
+  { XD_STRUCT_PTR,  offsetof(struct Lisp_Coding_System, iso2022.input_conv),  1, &ccsd_description },
+  { XD_STRUCT_PTR,  offsetof(struct Lisp_Coding_System, iso2022.output_conv), 1, &ccsd_description },
+  { XD_LISP_OBJECT, offsetof(struct Lisp_Coding_System, ccl.decode), 2 },
+#endif
+  { XD_END }
+};
+
+DEFINE_LRECORD_IMPLEMENTATION ("coding-system", coding_system,
+                              mark_coding_system, print_coding_system,
+                              finalize_coding_system,
+                              0, 0, coding_system_description,
+                              struct Lisp_Coding_System);
+
+static Lisp_Object
+mark_coding_system (Lisp_Object obj, void (*markobj) (Lisp_Object))
+{
+  Lisp_Coding_System *codesys = XCODING_SYSTEM (obj);
+
+  markobj (CODING_SYSTEM_NAME (codesys));
+  markobj (CODING_SYSTEM_DOC_STRING (codesys));
+  markobj (CODING_SYSTEM_MNEMONIC (codesys));
+  markobj (CODING_SYSTEM_EOL_LF (codesys));
+  markobj (CODING_SYSTEM_EOL_CRLF (codesys));
+  markobj (CODING_SYSTEM_EOL_CR (codesys));
+
+  switch (CODING_SYSTEM_TYPE (codesys))
+    {
+#ifdef MULE
+      int i;
+    case CODESYS_ISO2022:
+      for (i = 0; i < 4; i++)
+       markobj (CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i));
+      if (codesys->iso2022.input_conv)
+       {
+         for (i = 0; i < Dynarr_length (codesys->iso2022.input_conv); i++)
+           {
+             struct charset_conversion_spec *ccs =
+               Dynarr_atp (codesys->iso2022.input_conv, i);
+             markobj (ccs->from_charset);
+             markobj (ccs->to_charset);
+           }
+       }
+      if (codesys->iso2022.output_conv)
+       {
+         for (i = 0; i < Dynarr_length (codesys->iso2022.output_conv); i++)
+           {
+             struct charset_conversion_spec *ccs =
+               Dynarr_atp (codesys->iso2022.output_conv, i);
+             markobj (ccs->from_charset);
+             markobj (ccs->to_charset);
+           }
+       }
+      break;
+
+    case CODESYS_CCL:
+      markobj (CODING_SYSTEM_CCL_DECODE (codesys));
+      markobj (CODING_SYSTEM_CCL_ENCODE (codesys));
+      break;
+#endif /* MULE */
+    default:
+      break;
+    }
+
+  markobj (CODING_SYSTEM_PRE_WRITE_CONVERSION (codesys));
+  return CODING_SYSTEM_POST_READ_CONVERSION (codesys);
+}
+
+static void
+print_coding_system (Lisp_Object obj, Lisp_Object printcharfun,
+                    int escapeflag)
+{
+  Lisp_Coding_System *c = XCODING_SYSTEM (obj);
+  if (print_readably)
+    error ("printing unreadable object #<coding_system 0x%x>",
+          c->header.uid);
+
+  write_c_string ("#<coding_system ", printcharfun);
+  print_internal (c->name, printcharfun, 1);
+  write_c_string (">", printcharfun);
+}
+
+static void
+finalize_coding_system (void *header, int for_disksave)
+{
+  Lisp_Coding_System *c = (Lisp_Coding_System *) header;
+  /* Since coding systems never go away, this function is not
+     necessary.  But it would be necessary if we changed things
+     so that coding systems could go away. */
+  if (!for_disksave) /* see comment in lstream.c */
+    {
+      switch (CODING_SYSTEM_TYPE (c))
+       {
+#ifdef MULE
+       case CODESYS_ISO2022:
+         if (c->iso2022.input_conv)
+           {
+             Dynarr_free (c->iso2022.input_conv);
+             c->iso2022.input_conv = 0;
+           }
+         if (c->iso2022.output_conv)
+           {
+             Dynarr_free (c->iso2022.output_conv);
+             c->iso2022.output_conv = 0;
+           }
+         break;
+#endif /* MULE */
+       default:
+         break;
+       }
+    }
+}
+
+static enum eol_type
+symbol_to_eol_type (Lisp_Object symbol)
+{
+  CHECK_SYMBOL (symbol);
+  if (NILP (symbol))      return EOL_AUTODETECT;
+  if (EQ (symbol, Qlf))   return EOL_LF;
+  if (EQ (symbol, Qcrlf)) return EOL_CRLF;
+  if (EQ (symbol, Qcr))   return EOL_CR;
+
+  signal_simple_error ("Unrecognized eol type", symbol);
+  return EOL_AUTODETECT; /* not reached */
+}
+
+static Lisp_Object
+eol_type_to_symbol (enum eol_type type)
+{
+  switch (type)
+    {
+    default: abort ();
+    case EOL_LF:         return Qlf;
+    case EOL_CRLF:       return Qcrlf;
+    case EOL_CR:         return Qcr;
+    case EOL_AUTODETECT: return Qnil;
+    }
+}
+
+static void
+setup_eol_coding_systems (Lisp_Coding_System *codesys)
+{
+  Lisp_Object codesys_obj;
+  int len = string_length (XSYMBOL (CODING_SYSTEM_NAME (codesys))->name);
+  char *codesys_name = (char *) alloca (len + 7);
+  int mlen = -1;
+  char *codesys_mnemonic=0;
+
+  Lisp_Object codesys_name_sym, sub_codesys_obj;
+
+  /* kludge */
+
+  XSETCODING_SYSTEM (codesys_obj, codesys);
+
+  memcpy (codesys_name,
+         string_data (XSYMBOL (CODING_SYSTEM_NAME (codesys))->name), len);
+
+  if (STRINGP (CODING_SYSTEM_MNEMONIC (codesys)))
+    {
+      mlen = XSTRING_LENGTH (CODING_SYSTEM_MNEMONIC (codesys));
+      codesys_mnemonic = (char *) alloca (mlen + 7);
+      memcpy (codesys_mnemonic,
+             XSTRING_DATA (CODING_SYSTEM_MNEMONIC (codesys)), mlen);
+    }
+
+#define DEFINE_SUB_CODESYS(op_sys, op_sys_abbr, Type) do {                     \
+  strcpy (codesys_name + len, "-" op_sys);                                     \
+  if (mlen != -1)                                                              \
+    strcpy (codesys_mnemonic + mlen, op_sys_abbr);                             \
+  codesys_name_sym = intern (codesys_name);                                    \
+  sub_codesys_obj = Fcopy_coding_system (codesys_obj, codesys_name_sym);       \
+  XCODING_SYSTEM_EOL_TYPE (sub_codesys_obj) = Type;                            \
+  if (mlen != -1)                                                              \
+    XCODING_SYSTEM_MNEMONIC(sub_codesys_obj) =                                 \
+      build_string (codesys_mnemonic);                                         \
+  CODING_SYSTEM_##Type (codesys) = sub_codesys_obj;                            \
+} while (0)
+
+  DEFINE_SUB_CODESYS("unix", "", EOL_LF);
+  DEFINE_SUB_CODESYS("dos",  ":T", EOL_CRLF);
+  DEFINE_SUB_CODESYS("mac",  ":t", EOL_CR);
+}
+
+DEFUN ("coding-system-p", Fcoding_system_p, 1, 1, 0, /*
+Return t if OBJECT is a coding system.
+A coding system is an object that defines how text containing multiple
+character sets is encoded into a stream of (typically 8-bit) bytes.
+The coding system is used to decode the stream into a series of
+characters (which may be from multiple charsets) when the text is read
+from a file or process, and is used to encode the text back into the
+same format when it is written out to a file or process.
+
+For example, many ISO2022-compliant coding systems (such as Compound
+Text, which is used for inter-client data under the X Window System)
+use escape sequences to switch between different charsets -- Japanese
+Kanji, for example, is invoked with "ESC $ ( B"; ASCII is invoked
+with "ESC ( B"; and Cyrillic is invoked with "ESC - L".  See
+`make-coding-system' for more information.
+
+Coding systems are normally identified using a symbol, and the
+symbol is accepted in place of the actual coding system object whenever
+a coding system is called for. (This is similar to how faces work.)
+*/
+       (object))
+{
+  return CODING_SYSTEMP (object) ? Qt : Qnil;
+}
+
+DEFUN ("find-coding-system", Ffind_coding_system, 1, 1, 0, /*
+Retrieve the coding system of the given name.
+
+If CODING-SYSTEM-OR-NAME is a coding-system object, it is simply
+returned.  Otherwise, CODING-SYSTEM-OR-NAME should be a symbol.
+If there is no such coding system, nil is returned.  Otherwise the
+associated coding system object is returned.
+*/
+       (coding_system_or_name))
+{
+  if (CODING_SYSTEMP (coding_system_or_name))
+    return coding_system_or_name;
+
+  if (NILP (coding_system_or_name))
+    coding_system_or_name = Qbinary;
+  else
+    CHECK_SYMBOL (coding_system_or_name);
+
+  return Fgethash (coding_system_or_name, Vcoding_system_hash_table, Qnil);
+}
+
+DEFUN ("get-coding-system", Fget_coding_system, 1, 1, 0, /*
+Retrieve the coding system of the given name.
+Same as `find-coding-system' except that if there is no such
+coding system, an error is signaled instead of returning nil.
+*/
+       (name))
+{
+  Lisp_Object coding_system = Ffind_coding_system (name);
+
+  if (NILP (coding_system))
+    signal_simple_error ("No such coding system", name);
+  return coding_system;
+}
+
+/* We store the coding systems in hash tables with the names as the key and the
+   actual coding system object as the value.  Occasionally we need to use them
+   in a list format.  These routines provide us with that. */
+struct coding_system_list_closure
+{
+  Lisp_Object *coding_system_list;
+};
+
+static int
+add_coding_system_to_list_mapper (Lisp_Object key, Lisp_Object value,
+                                 void *coding_system_list_closure)
+{
+  /* This function can GC */
+  struct coding_system_list_closure *cscl =
+    (struct coding_system_list_closure *) coding_system_list_closure;
+  Lisp_Object *coding_system_list = cscl->coding_system_list;
+
+  *coding_system_list = Fcons (XCODING_SYSTEM (value)->name,
+                              *coding_system_list);
+  return 0;
+}
+
+DEFUN ("coding-system-list", Fcoding_system_list, 0, 0, 0, /*
+Return a list of the names of all defined coding systems.
+*/
+       ())
+{
+  Lisp_Object coding_system_list = Qnil;
+  struct gcpro gcpro1;
+  struct coding_system_list_closure coding_system_list_closure;
+
+  GCPRO1 (coding_system_list);
+  coding_system_list_closure.coding_system_list = &coding_system_list;
+  elisp_maphash (add_coding_system_to_list_mapper, Vcoding_system_hash_table,
+                &coding_system_list_closure);
+  UNGCPRO;
+
+  return coding_system_list;
+}
+
+DEFUN ("coding-system-name", Fcoding_system_name, 1, 1, 0, /*
+Return the name of the given coding system.
+*/
+       (coding_system))
+{
+  coding_system = Fget_coding_system (coding_system);
+  return XCODING_SYSTEM_NAME (coding_system);
+}
+
+static Lisp_Coding_System *
+allocate_coding_system (enum coding_system_type type, Lisp_Object name)
+{
+  Lisp_Coding_System *codesys =
+    alloc_lcrecord_type (Lisp_Coding_System, &lrecord_coding_system);
+
+  zero_lcrecord (codesys);
+  CODING_SYSTEM_PRE_WRITE_CONVERSION (codesys) = Qnil;
+  CODING_SYSTEM_POST_READ_CONVERSION (codesys) = Qnil;
+  CODING_SYSTEM_EOL_TYPE (codesys) = EOL_AUTODETECT;
+  CODING_SYSTEM_EOL_CRLF (codesys) = Qnil;
+  CODING_SYSTEM_EOL_CR   (codesys) = Qnil;
+  CODING_SYSTEM_EOL_LF   (codesys) = Qnil;
+  CODING_SYSTEM_TYPE     (codesys) = type;
+  CODING_SYSTEM_MNEMONIC (codesys) = Qnil;
+#ifdef MULE
+  if (type == CODESYS_ISO2022)
+    {
+      int i;
+      for (i = 0; i < 4; i++)
+       CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i) = Qnil;
+    }
+  else if (type == CODESYS_CCL)
+    {
+      CODING_SYSTEM_CCL_DECODE (codesys) = Qnil;
+      CODING_SYSTEM_CCL_ENCODE (codesys) = Qnil;
+    }
+#endif /* MULE */
+  CODING_SYSTEM_NAME (codesys) = name;
+
+  return codesys;
+}
+
+#ifdef MULE
+/* Given a list of charset conversion specs as specified in a Lisp
+   program, parse it into STORE_HERE. */
+
+static void
+parse_charset_conversion_specs (charset_conversion_spec_dynarr *store_here,
+                               Lisp_Object spec_list)
+{
+  Lisp_Object rest;
+
+  EXTERNAL_LIST_LOOP (rest, spec_list)
+    {
+      Lisp_Object car = XCAR (rest);
+      Lisp_Object from, to;
+      struct charset_conversion_spec spec;
+
+      if (!CONSP (car) || !CONSP (XCDR (car)) || !NILP (XCDR (XCDR (car))))
+       signal_simple_error ("Invalid charset conversion spec", car);
+      from = Fget_charset (XCAR (car));
+      to = Fget_charset (XCAR (XCDR (car)));
+      if (XCHARSET_TYPE (from) != XCHARSET_TYPE (to))
+       signal_simple_error_2
+         ("Attempted conversion between different charset types",
+          from, to);
+      spec.from_charset = from;
+      spec.to_charset = to;
+
+      Dynarr_add (store_here, spec);
+    }
+}
+
+/* Given a dynarr LOAD_HERE of internally-stored charset conversion
+   specs, return the equivalent as the Lisp programmer would see it.
+
+   If LOAD_HERE is 0, return Qnil. */
+
+static Lisp_Object
+unparse_charset_conversion_specs (charset_conversion_spec_dynarr *load_here)
+{
+  int i;
+  Lisp_Object result;
+
+  if (!load_here)
+    return Qnil;
+  for (i = 0, result = Qnil; i < Dynarr_length (load_here); i++)
+    {
+      struct charset_conversion_spec *ccs = Dynarr_atp (load_here, i);
+      result = Fcons (list2 (ccs->from_charset, ccs->to_charset), result);
+    }
+
+  return Fnreverse (result);
+}
+
+#endif /* MULE */
+
+DEFUN ("make-coding-system", Fmake_coding_system, 2, 4, 0, /*
+Register symbol NAME as a coding system.
+
+TYPE describes the conversion method used and should be one of
+
+nil or 'undecided
+     Automatic conversion.  XEmacs attempts to detect the coding system
+     used in the file.
+'no-conversion
+     No conversion.  Use this for binary files and such.  On output,
+     graphic characters that are not in ASCII or Latin-1 will be
+     replaced by a ?. (For a no-conversion-encoded buffer, these
+     characters will only be present if you explicitly insert them.)
+'shift-jis
+     Shift-JIS (a Japanese encoding commonly used in PC operating systems).
+'ucs-4
+     ISO 10646 UCS-4 encoding.
+'utf-8
+     ISO 10646 UTF-8 encoding.
+'iso2022
+     Any ISO2022-compliant encoding.  Among other things, this includes
+     JIS (the Japanese encoding commonly used for e-mail), EUC (the
+     standard Unix encoding for Japanese and other languages), and
+     Compound Text (the encoding used in X11).  You can specify more
+     specific information about the conversion with the FLAGS argument.
+'big5
+     Big5 (the encoding commonly used for Taiwanese).
+'ccl
+     The conversion is performed using a user-written pseudo-code
+     program.  CCL (Code Conversion Language) is the name of this
+     pseudo-code.
+'internal
+     Write out or read in the raw contents of the memory representing
+     the buffer's text.  This is primarily useful for debugging
+     purposes, and is only enabled when XEmacs has been compiled with
+     DEBUG_XEMACS defined (via the --debug configure option).
+     WARNING: Reading in a file using 'internal conversion can result
+     in an internal inconsistency in the memory representing a
+     buffer's text, which will produce unpredictable results and may
+     cause XEmacs to crash.  Under normal circumstances you should
+     never use 'internal conversion.
+
+DOC-STRING is a string describing the coding system.
+
+PROPS is a property list, describing the specific nature of the
+character set.  Recognized properties are:
+
+'mnemonic
+     String to be displayed in the modeline when this coding system is
+     active.
+
+'eol-type
+     End-of-line conversion to be used.  It should be one of
+
+       nil
+               Automatically detect the end-of-line type (LF, CRLF,
+               or CR).  Also generate subsidiary coding systems named
+               `NAME-unix', `NAME-dos', and `NAME-mac', that are
+               identical to this coding system but have an EOL-TYPE
+               value of 'lf, 'crlf, and 'cr, respectively.
+       'lf
+               The end of a line is marked externally using ASCII LF.
+               Since this is also the way that XEmacs represents an
+               end-of-line internally, specifying this option results
+               in no end-of-line conversion.  This is the standard
+               format for Unix text files.
+       'crlf
+               The end of a line is marked externally using ASCII
+               CRLF.  This is the standard format for MS-DOS text
+               files.
+       'cr
+               The end of a line is marked externally using ASCII CR.
+               This is the standard format for Macintosh text files.
+       t
+               Automatically detect the end-of-line type but do not
+               generate subsidiary coding systems.  (This value is
+               converted to nil when stored internally, and
+               `coding-system-property' will return nil.)
+
+'post-read-conversion
+     Function called after a file has been read in, to perform the
+     decoding.  Called with two arguments, BEG and END, denoting
+     a region of the current buffer to be decoded.
+
+'pre-write-conversion
+     Function called before a file is written out, to perform the
+     encoding.  Called with two arguments, BEG and END, denoting
+     a region of the current buffer to be encoded.
+
+
+The following additional properties are recognized if TYPE is 'iso2022:
+
+'charset-g0
+'charset-g1
+'charset-g2
+'charset-g3
+     The character set initially designated to the G0 - G3 registers.
+     The value should be one of
+
+          -- A charset object (designate that character set)
+         -- nil (do not ever use this register)
+         -- t (no character set is initially designated to
+               the register, but may be later on; this automatically
+               sets the corresponding `force-g*-on-output' property)
+
+'force-g0-on-output
+'force-g1-on-output
+'force-g2-on-output
+'force-g2-on-output
+     If non-nil, send an explicit designation sequence on output before
+     using the specified register.
+
+'short
+     If non-nil, use the short forms "ESC $ @", "ESC $ A", and
+     "ESC $ B" on output in place of the full designation sequences
+     "ESC $ ( @", "ESC $ ( A", and "ESC $ ( B".
+
+'no-ascii-eol
+     If non-nil, don't designate ASCII to G0 at each end of line on output.
+     Setting this to non-nil also suppresses other state-resetting that
+     normally happens at the end of a line.
+
+'no-ascii-cntl
+     If non-nil, don't designate ASCII to G0 before control chars on output.
+
+'seven
+     If non-nil, use 7-bit environment on output.  Otherwise, use 8-bit
+     environment.
+
+'lock-shift
+     If non-nil, use locking-shift (SO/SI) instead of single-shift
+     or designation by escape sequence.
+
+'no-iso6429
+     If non-nil, don't use ISO6429's direction specification.
+
+'escape-quoted
+     If non-nil, literal control characters that are the same as
+     the beginning of a recognized ISO2022 or ISO6429 escape sequence
+     (in particular, ESC (0x1B), SO (0x0E), SI (0x0F), SS2 (0x8E),
+     SS3 (0x8F), and CSI (0x9B)) are "quoted" with an escape character
+     so that they can be properly distinguished from an escape sequence.
+     (Note that doing this results in a non-portable encoding.) This
+     encoding flag is used for byte-compiled files.  Note that ESC
+     is a good choice for a quoting character because there are no
+     escape sequences whose second byte is a character from the Control-0
+     or Control-1 character sets; this is explicitly disallowed by the
+     ISO2022 standard.
+
+'input-charset-conversion
+     A list of conversion specifications, specifying conversion of
+     characters in one charset to another when decoding is performed.
+     Each specification is a list of two elements: the source charset,
+     and the destination charset.
+
+'output-charset-conversion
+     A list of conversion specifications, specifying conversion of
+     characters in one charset to another when encoding is performed.
+     The form of each specification is the same as for
+     'input-charset-conversion.
+
+
+The following additional properties are recognized (and required)
+if TYPE is 'ccl:
+
+'decode
+     CCL program used for decoding (converting to internal format).
+
+'encode
+     CCL program used for encoding (converting to external format).
+*/
+       (name, type, doc_string, props))
+{
+  Lisp_Coding_System *codesys;
+  Lisp_Object rest, key, value;
+  enum coding_system_type ty;
+  int need_to_setup_eol_systems = 1;
+
+  /* Convert type to constant */
+  if (NILP (type) || EQ (type, Qundecided))
+                                      { ty = CODESYS_AUTODETECT; }
+#ifdef MULE
+  else if (EQ (type, Qshift_jis))     { ty = CODESYS_SHIFT_JIS; }
+  else if (EQ (type, Qiso2022))       { ty = CODESYS_ISO2022; }
+  else if (EQ (type, Qbig5))          { ty = CODESYS_BIG5; }
+  else if (EQ (type, Qucs4))          { ty = CODESYS_UCS4; }
+  else if (EQ (type, Qutf8))          { ty = CODESYS_UTF8; }
+  else if (EQ (type, Qccl))           { ty = CODESYS_CCL; }
+#endif
+  else if (EQ (type, Qno_conversion)) { ty = CODESYS_NO_CONVERSION; }
+#ifdef DEBUG_XEMACS
+  else if (EQ (type, Qinternal))      { ty = CODESYS_INTERNAL; }
+#endif
+  else
+    signal_simple_error ("Invalid coding system type", type);
+
+  CHECK_SYMBOL (name);
+
+  codesys = allocate_coding_system (ty, name);
+
+  if (NILP (doc_string))
+    doc_string = build_string ("");
+  else
+    CHECK_STRING (doc_string);
+  CODING_SYSTEM_DOC_STRING (codesys) = doc_string;
+
+#ifdef UTF2000
+  if (ty == CODESYS_NO_CONVERSION)
+    codesys->fixed.size = 1;
+#endif
+  EXTERNAL_PROPERTY_LIST_LOOP (rest, key, value, props)
+    {
+      if (EQ (key, Qmnemonic))
+       {
+          if (!NILP (value))
+           CHECK_STRING (value);
+         CODING_SYSTEM_MNEMONIC (codesys) = value;
+       }
+
+      else if (EQ (key, Qeol_type))
+       {
+         need_to_setup_eol_systems = NILP (value);
+         if (EQ (value, Qt))
+           value = Qnil;
+         CODING_SYSTEM_EOL_TYPE (codesys) = symbol_to_eol_type (value);
+       }
+
+      else if (EQ (key, Qpost_read_conversion)) CODING_SYSTEM_POST_READ_CONVERSION (codesys) = value;
+      else if (EQ (key, Qpre_write_conversion)) CODING_SYSTEM_PRE_WRITE_CONVERSION (codesys) = value;
+#ifdef MULE
+      else if (ty == CODESYS_ISO2022)
+       {
+#define FROB_INITIAL_CHARSET(charset_num) \
+  CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, charset_num) = \
+    ((EQ (value, Qt) || EQ (value, Qnil)) ? value : Fget_charset (value))
+
+         if      (EQ (key, Qcharset_g0)) FROB_INITIAL_CHARSET (0);
+         else if (EQ (key, Qcharset_g1)) FROB_INITIAL_CHARSET (1);
+         else if (EQ (key, Qcharset_g2)) FROB_INITIAL_CHARSET (2);
+         else if (EQ (key, Qcharset_g3)) FROB_INITIAL_CHARSET (3);
+
+#define FROB_FORCE_CHARSET(charset_num) \
+  CODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT (codesys, charset_num) = !NILP (value)
+
+         else if (EQ (key, Qforce_g0_on_output)) FROB_FORCE_CHARSET (0);
+         else if (EQ (key, Qforce_g1_on_output)) FROB_FORCE_CHARSET (1);
+         else if (EQ (key, Qforce_g2_on_output)) FROB_FORCE_CHARSET (2);
+         else if (EQ (key, Qforce_g3_on_output)) FROB_FORCE_CHARSET (3);
+
+#define FROB_BOOLEAN_PROPERTY(prop) \
+  CODING_SYSTEM_ISO2022_##prop (codesys) = !NILP (value)
+
+         else if (EQ (key, Qshort))         FROB_BOOLEAN_PROPERTY (SHORT);
+         else if (EQ (key, Qno_ascii_eol))  FROB_BOOLEAN_PROPERTY (NO_ASCII_EOL);
+         else if (EQ (key, Qno_ascii_cntl)) FROB_BOOLEAN_PROPERTY (NO_ASCII_CNTL);
+         else if (EQ (key, Qseven))         FROB_BOOLEAN_PROPERTY (SEVEN);
+         else if (EQ (key, Qlock_shift))    FROB_BOOLEAN_PROPERTY (LOCK_SHIFT);
+         else if (EQ (key, Qno_iso6429))    FROB_BOOLEAN_PROPERTY (NO_ISO6429);
+         else if (EQ (key, Qescape_quoted)) FROB_BOOLEAN_PROPERTY (ESCAPE_QUOTED);
+
+         else if (EQ (key, Qinput_charset_conversion))
+           {
+             codesys->iso2022.input_conv =
+               Dynarr_new (charset_conversion_spec);
+             parse_charset_conversion_specs (codesys->iso2022.input_conv,
+                                             value);
+           }
+         else if (EQ (key, Qoutput_charset_conversion))
+           {
+             codesys->iso2022.output_conv =
+               Dynarr_new (charset_conversion_spec);
+             parse_charset_conversion_specs (codesys->iso2022.output_conv,
+                                             value);
+           }
+         else
+           signal_simple_error ("Unrecognized property", key);
+       }
+      else if (EQ (type, Qccl))
+       {
+         if (EQ (key, Qdecode))
+           {
+             CHECK_VECTOR (value);
+             CODING_SYSTEM_CCL_DECODE (codesys) = value;
+           }
+         else if (EQ (key, Qencode))
+           {
+             CHECK_VECTOR (value);
+             CODING_SYSTEM_CCL_ENCODE (codesys) = value;
+           }
+         else
+           signal_simple_error ("Unrecognized property", key);
+       }
+#endif /* MULE */
+      else
+       signal_simple_error ("Unrecognized property", key);
+    }
+
+  if (need_to_setup_eol_systems)
+    setup_eol_coding_systems (codesys);
+
+  {
+    Lisp_Object codesys_obj;
+    XSETCODING_SYSTEM (codesys_obj, codesys);
+    Fputhash (name, codesys_obj, Vcoding_system_hash_table);
+    return codesys_obj;
+  }
+}
+
+DEFUN ("copy-coding-system", Fcopy_coding_system, 2, 2, 0, /*
+Copy OLD-CODING-SYSTEM to NEW-NAME.
+If NEW-NAME does not name an existing coding system, a new one will
+be created.
+*/
+       (old_coding_system, new_name))
+{
+  Lisp_Object new_coding_system;
+  old_coding_system = Fget_coding_system (old_coding_system);
+  new_coding_system = Ffind_coding_system (new_name);
+  if (NILP (new_coding_system))
+    {
+      XSETCODING_SYSTEM (new_coding_system,
+                        allocate_coding_system
+                        (XCODING_SYSTEM_TYPE (old_coding_system),
+                         new_name));
+      Fputhash (new_name, new_coding_system, Vcoding_system_hash_table);
+    }
+
+  {
+    Lisp_Coding_System *to = XCODING_SYSTEM (new_coding_system);
+    Lisp_Coding_System *from = XCODING_SYSTEM (old_coding_system);
+    memcpy (((char *) to  ) + sizeof (to->header),
+           ((char *) from) + sizeof (from->header),
+           sizeof (*from) - sizeof (from->header));
+    to->name = new_name;
+  }
+  return new_coding_system;
+}
+
+DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias, 2, 2, 0, /*
+Define symbol ALIAS as an alias for coding system CODING-SYSTEM.
+*/
+       (alias, coding_system))
+{
+  CHECK_SYMBOL (alias);
+  if (!NILP (Ffind_coding_system (alias)))
+    signal_simple_error ("Symbol already names a coding system", alias);
+  coding_system = Fget_coding_system (coding_system);
+  Fputhash (alias, coding_system, Vcoding_system_hash_table);
+
+  /* Set up aliases for subsidiaries. */
+  if (XCODING_SYSTEM_EOL_TYPE (coding_system) == EOL_AUTODETECT)
+    {
+      Lisp_Object str;
+      XSETSTRING (str, symbol_name (XSYMBOL (alias)));
+#define FROB(type, name)                                                       \
+      do {                                                                     \
+       Lisp_Object subsidiary = XCODING_SYSTEM_EOL_##type (coding_system);     \
+       if (!NILP (subsidiary))                                                 \
+         Fdefine_coding_system_alias                                           \
+           (Fintern (concat2 (str, build_string (name)), Qnil), subsidiary);   \
+      } while (0)
+      FROB (LF,   "-unix");
+      FROB (CRLF, "-dos");
+      FROB (CR,   "-mac");
+#undef FROB
+    }
+  /* FSF return value is a vector of [ALIAS-unix ALIAS-doc ALIAS-mac],
+     but it doesn't look intentional, so I'd rather return something
+     meaningful or nothing at all. */
+  return Qnil;
+}
+
+static Lisp_Object
+subsidiary_coding_system (Lisp_Object coding_system, enum eol_type type)
+{
+  Lisp_Coding_System *cs = XCODING_SYSTEM (coding_system);
+  Lisp_Object new_coding_system;
+
+  if (CODING_SYSTEM_EOL_TYPE (cs) != EOL_AUTODETECT)
+    return coding_system;
+
+  switch (type)
+    {
+    case EOL_AUTODETECT: return coding_system;
+    case EOL_LF:   new_coding_system = CODING_SYSTEM_EOL_LF   (cs); break;
+    case EOL_CR:   new_coding_system = CODING_SYSTEM_EOL_CR   (cs); break;
+    case EOL_CRLF: new_coding_system = CODING_SYSTEM_EOL_CRLF (cs); break;
+    default:       abort ();
+    }
+
+  return NILP (new_coding_system) ? coding_system : new_coding_system;
+}
+
+DEFUN ("subsidiary-coding-system", Fsubsidiary_coding_system, 2, 2, 0, /*
+Return the subsidiary coding system of CODING-SYSTEM with eol type EOL-TYPE.
+*/
+       (coding_system, eol_type))
+{
+  coding_system = Fget_coding_system (coding_system);
+
+  return subsidiary_coding_system (coding_system,
+                                  symbol_to_eol_type (eol_type));
+}
+
+\f
+/************************************************************************/
+/*                         Coding system accessors                      */
+/************************************************************************/
+
+DEFUN ("coding-system-doc-string", Fcoding_system_doc_string, 1, 1, 0, /*
+Return the doc string for CODING-SYSTEM.
+*/
+       (coding_system))
+{
+  coding_system = Fget_coding_system (coding_system);
+  return XCODING_SYSTEM_DOC_STRING (coding_system);
+}
+
+DEFUN ("coding-system-type", Fcoding_system_type, 1, 1, 0, /*
+Return the type of CODING-SYSTEM.
+*/
+       (coding_system))
+{
+  switch (XCODING_SYSTEM_TYPE (Fget_coding_system (coding_system)))
+    {
+    default: abort ();
+    case CODESYS_AUTODETECT:   return Qundecided;
+#ifdef MULE
+    case CODESYS_SHIFT_JIS:    return Qshift_jis;
+    case CODESYS_ISO2022:      return Qiso2022;
+    case CODESYS_BIG5:         return Qbig5;
+    case CODESYS_UCS4:         return Qucs4;
+    case CODESYS_UTF8:         return Qutf8;
+    case CODESYS_CCL:          return Qccl;
+#endif
+    case CODESYS_NO_CONVERSION:        return Qno_conversion;
+#ifdef DEBUG_XEMACS
+    case CODESYS_INTERNAL:     return Qinternal;
+#endif
+    }
+}
+
+#ifdef MULE
+static
+Lisp_Object coding_system_charset (Lisp_Object coding_system, int gnum)
+{
+  Lisp_Object cs
+    = XCODING_SYSTEM_ISO2022_INITIAL_CHARSET (coding_system, gnum);
+
+  return CHARSETP (cs) ? XCHARSET_NAME (cs) : Qnil;
+}
+
+DEFUN ("coding-system-charset", Fcoding_system_charset, 2, 2, 0, /*
+Return initial charset of CODING-SYSTEM designated to GNUM.
+GNUM allows 0 .. 3.
+*/
+       (coding_system, gnum))
+{
+  coding_system = Fget_coding_system (coding_system);
+  CHECK_INT (gnum);
+
+  return coding_system_charset (coding_system, XINT (gnum));
+}
+#endif /* MULE */
+
+DEFUN ("coding-system-property", Fcoding_system_property, 2, 2, 0, /*
+Return the PROP property of CODING-SYSTEM.
+*/
+       (coding_system, prop))
+{
+  int i, ok = 0;
+  enum coding_system_type type;
+
+  coding_system = Fget_coding_system (coding_system);
+  CHECK_SYMBOL (prop);
+  type = XCODING_SYSTEM_TYPE (coding_system);
+
+  for (i = 0; !ok && i < Dynarr_length (the_codesys_prop_dynarr); i++)
+    if (EQ (Dynarr_at (the_codesys_prop_dynarr, i).sym, prop))
+      {
+       ok = 1;
+       switch (Dynarr_at (the_codesys_prop_dynarr, i).prop_type)
+         {
+         case CODESYS_PROP_ALL_OK:
+           break;
+#ifdef MULE
+         case CODESYS_PROP_ISO2022:
+           if (type != CODESYS_ISO2022)
+             signal_simple_error
+               ("Property only valid in ISO2022 coding systems",
+                prop);
+           break;
+
+         case CODESYS_PROP_CCL:
+           if (type != CODESYS_CCL)
+             signal_simple_error
+               ("Property only valid in CCL coding systems",
+                prop);
+           break;
+#endif /* MULE */
+         default:
+           abort ();
+         }
+      }
+
+  if (!ok)
+    signal_simple_error ("Unrecognized property", prop);
+
+  if (EQ (prop, Qname))
+    return XCODING_SYSTEM_NAME (coding_system);
+  else if (EQ (prop, Qtype))
+    return Fcoding_system_type (coding_system);
+  else if (EQ (prop, Qdoc_string))
+    return XCODING_SYSTEM_DOC_STRING (coding_system);
+  else if (EQ (prop, Qmnemonic))
+    return XCODING_SYSTEM_MNEMONIC (coding_system);
+  else if (EQ (prop, Qeol_type))
+    return eol_type_to_symbol (XCODING_SYSTEM_EOL_TYPE (coding_system));
+  else if (EQ (prop, Qeol_lf))
+    return XCODING_SYSTEM_EOL_LF (coding_system);
+  else if (EQ (prop, Qeol_crlf))
+    return XCODING_SYSTEM_EOL_CRLF (coding_system);
+  else if (EQ (prop, Qeol_cr))
+    return XCODING_SYSTEM_EOL_CR (coding_system);
+  else if (EQ (prop, Qpost_read_conversion))
+    return XCODING_SYSTEM_POST_READ_CONVERSION (coding_system);
+  else if (EQ (prop, Qpre_write_conversion))
+    return XCODING_SYSTEM_PRE_WRITE_CONVERSION (coding_system);
+#ifdef MULE
+  else if (type == CODESYS_ISO2022)
+    {
+      if (EQ (prop, Qcharset_g0))
+       return coding_system_charset (coding_system, 0);
+      else if (EQ (prop, Qcharset_g1))
+       return coding_system_charset (coding_system, 1);
+      else if (EQ (prop, Qcharset_g2))
+       return coding_system_charset (coding_system, 2);
+      else if (EQ (prop, Qcharset_g3))
+       return coding_system_charset (coding_system, 3);
+
+#define FORCE_CHARSET(charset_num) \
+  (XCODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT \
+   (coding_system, charset_num) ? Qt : Qnil)
+
+      else if (EQ (prop, Qforce_g0_on_output)) return FORCE_CHARSET (0);
+      else if (EQ (prop, Qforce_g1_on_output)) return FORCE_CHARSET (1);
+      else if (EQ (prop, Qforce_g2_on_output)) return FORCE_CHARSET (2);
+      else if (EQ (prop, Qforce_g3_on_output)) return FORCE_CHARSET (3);
+
+#define LISP_BOOLEAN(prop) \
+  (XCODING_SYSTEM_ISO2022_##prop (coding_system) ? Qt : Qnil)
+
+      else if (EQ (prop, Qshort))         return LISP_BOOLEAN (SHORT);
+      else if (EQ (prop, Qno_ascii_eol))  return LISP_BOOLEAN (NO_ASCII_EOL);
+      else if (EQ (prop, Qno_ascii_cntl)) return LISP_BOOLEAN (NO_ASCII_CNTL);
+      else if (EQ (prop, Qseven))         return LISP_BOOLEAN (SEVEN);
+      else if (EQ (prop, Qlock_shift))    return LISP_BOOLEAN (LOCK_SHIFT);
+      else if (EQ (prop, Qno_iso6429))    return LISP_BOOLEAN (NO_ISO6429);
+      else if (EQ (prop, Qescape_quoted)) return LISP_BOOLEAN (ESCAPE_QUOTED);
+
+      else if (EQ (prop, Qinput_charset_conversion))
+       return
+         unparse_charset_conversion_specs
+           (XCODING_SYSTEM (coding_system)->iso2022.input_conv);
+      else if (EQ (prop, Qoutput_charset_conversion))
+       return
+         unparse_charset_conversion_specs
+           (XCODING_SYSTEM (coding_system)->iso2022.output_conv);
+      else
+       abort ();
+    }
+  else if (type == CODESYS_CCL)
+    {
+      if (EQ (prop, Qdecode))
+       return XCODING_SYSTEM_CCL_DECODE (coding_system);
+      else if (EQ (prop, Qencode))
+       return XCODING_SYSTEM_CCL_ENCODE (coding_system);
+      else
+       abort ();
+    }
+#endif /* MULE */
+  else
+    abort ();
+
+  return Qnil; /* not reached */
+}
+
+\f
+/************************************************************************/
+/*                       Coding category functions                      */
+/************************************************************************/
+
+static int
+decode_coding_category (Lisp_Object symbol)
+{
+  int i;
+
+  CHECK_SYMBOL (symbol);
+  for (i = 0; i <= CODING_CATEGORY_LAST; i++)
+    if (EQ (coding_category_symbol[i], symbol))
+      return i;
+
+  signal_simple_error ("Unrecognized coding category", symbol);
+  return 0; /* not reached */
+}
+
+DEFUN ("coding-category-list", Fcoding_category_list, 0, 0, 0, /*
+Return a list of all recognized coding categories.
+*/
+       ())
+{
+  int i;
+  Lisp_Object list = Qnil;
+
+  for (i = CODING_CATEGORY_LAST; i >= 0; i--)
+    list = Fcons (coding_category_symbol[i], list);
+  return list;
+}
+
+DEFUN ("set-coding-priority-list", Fset_coding_priority_list, 1, 1, 0, /*
+Change the priority order of the coding categories.
+LIST should be list of coding categories, in descending order of
+priority.  Unspecified coding categories will be lower in priority
+than all specified ones, in the same relative order they were in
+previously.
+*/
+       (list))
+{
+  int category_to_priority[CODING_CATEGORY_LAST + 1];
+  int i, j;
+  Lisp_Object rest;
+
+  /* First generate a list that maps coding categories to priorities. */
+
+  for (i = 0; i <= CODING_CATEGORY_LAST; i++)
+    category_to_priority[i] = -1;
+
+  /* Highest priority comes from the specified list. */
+  i = 0;
+  EXTERNAL_LIST_LOOP (rest, list)
+    {
+      int cat = decode_coding_category (XCAR (rest));
+
+      if (category_to_priority[cat] >= 0)
+       signal_simple_error ("Duplicate coding category in list", XCAR (rest));
+      category_to_priority[cat] = i++;
+    }
+
+  /* Now go through the existing categories by priority to retrieve
+     the categories not yet specified and preserve their priority
+     order. */
+  for (j = 0; j <= CODING_CATEGORY_LAST; j++)
+    {
+      int cat = coding_category_by_priority[j];
+      if (category_to_priority[cat] < 0)
+       category_to_priority[cat] = i++;
+    }
+
+  /* Now we need to construct the inverse of the mapping we just
+     constructed. */
+
+  for (i = 0; i <= CODING_CATEGORY_LAST; i++)
+    coding_category_by_priority[category_to_priority[i]] = i;
+
+  /* Phew!  That was confusing. */
+  return Qnil;
+}
+
+DEFUN ("coding-priority-list", Fcoding_priority_list, 0, 0, 0, /*
+Return a list of coding categories in descending order of priority.
+*/
+       ())
+{
+  int i;
+  Lisp_Object list = Qnil;
+
+  for (i = CODING_CATEGORY_LAST; i >= 0; i--)
+    list = Fcons (coding_category_symbol[coding_category_by_priority[i]],
+                 list);
+  return list;
+}
+
+DEFUN ("set-coding-category-system", Fset_coding_category_system, 2, 2, 0, /*
+Change the coding system associated with a coding category.
+*/
+       (coding_category, coding_system))
+{
+  int cat = decode_coding_category (coding_category);
+
+  coding_system = Fget_coding_system (coding_system);
+  coding_category_system[cat] = coding_system;
+  return Qnil;
+}
+
+DEFUN ("coding-category-system", Fcoding_category_system, 1, 1, 0, /*
+Return the coding system associated with a coding category.
+*/
+       (coding_category))
+{
+  int cat = decode_coding_category (coding_category);
+  Lisp_Object sys = coding_category_system[cat];
+
+  if (!NILP (sys))
+    return XCODING_SYSTEM_NAME (sys);
+  return Qnil;
+}
+
+\f
+/************************************************************************/
+/*                     Detecting the encoding of data                   */
+/************************************************************************/
+
+struct detection_state
+{
+  enum eol_type eol_type;
+  int seen_non_ascii;
+  int mask;
+#ifdef MULE
+  struct
+    {
+      int mask;
+      int in_second_byte;
+    }
+  big5;
+
+  struct
+    {
+      int mask;
+      int in_second_byte;
+    }
+  shift_jis;
+
+  struct
+    {
+      int mask;
+      int in_byte;
+  }
+  ucs4;
+
+  struct
+    {
+      int mask;
+      int in_byte;
+    }
+  utf8;
+
+  struct
+    {
+      int mask;
+      int initted;
+      struct iso2022_decoder iso;
+      unsigned int flags;
+      int high_byte_count;
+      unsigned int saw_single_shift:1;
+    }
+  iso2022;
+#endif
+  struct
+    {
+      int seen_anything;
+      int just_saw_cr;
+    }
+  eol;
+};
+
+static int
+acceptable_control_char_p (int c)
+{
+  switch (c)
+    {
+      /* Allow and ignore control characters that you might
+        reasonably see in a text file */
+    case '\r':
+    case '\n':
+    case '\t':
+    case  7: /* bell */
+    case  8: /* backspace */
+    case 11: /* vertical tab */
+    case 12: /* form feed */
+    case 26: /* MS-DOS C-z junk */
+    case 31: /* '^_' -- for info */
+      return 1;
+    default:
+      return 0;
+    }
+}
+
+static int
+mask_has_at_most_one_bit_p (int mask)
+{
+  /* Perhaps the only thing useful you learn from intensive Microsoft
+     technical interviews */
+  return (mask & (mask - 1)) == 0;
+}
+
+static enum eol_type
+detect_eol_type (struct detection_state *st, CONST unsigned char *src,
+                unsigned int n)
+{
+  int c;
+
+  while (n--)
+    {
+      c = *src++;
+      if (c == '\r')
+       st->eol.just_saw_cr = 1;
+      else
+       {
+         if (c == '\n')
+           {
+             if (st->eol.just_saw_cr)
+               return EOL_CRLF;
+             else if (st->eol.seen_anything)
+               return EOL_LF;
+           }
+         else if (st->eol.just_saw_cr)
+           return EOL_CR;
+         st->eol.just_saw_cr = 0;
+       }
+      st->eol.seen_anything = 1;
+    }
+
+  return EOL_AUTODETECT;
+}
+
+/* Attempt to determine the encoding and EOL type of the given text.
+   Before calling this function for the first type, you must initialize
+   st->eol_type as appropriate and initialize st->mask to ~0.
+
+   st->eol_type holds the determined EOL type, or EOL_AUTODETECT if
+   not yet known.
+
+   st->mask holds the determined coding category mask, or ~0 if only
+   ASCII has been seen so far.
+
+   Returns:
+
+   0 == st->eol_type is EOL_AUTODETECT and/or more than coding category
+        is present in st->mask
+   1 == definitive answers are here for both st->eol_type and st->mask
+*/
+
+static int
+detect_coding_type (struct detection_state *st, CONST unsigned char *src,
+                   unsigned int n, int just_do_eol)
+{
+  int c;
+
+  if (st->eol_type == EOL_AUTODETECT)
+    st->eol_type = detect_eol_type (st, src, n);
+
+  if (just_do_eol)
+    return st->eol_type != EOL_AUTODETECT;
+
+  if (!st->seen_non_ascii)
+    {
+      for (; n; n--, src++)
+       {
+         c = *src;
+         if ((c < 0x20 && !acceptable_control_char_p (c)) || c >= 0x80)
+           {
+             st->seen_non_ascii = 1;
+#ifdef MULE
+             st->shift_jis.mask = ~0;
+             st->big5.mask = ~0;
+             st->ucs4.mask = ~0;
+             st->utf8.mask = ~0;
+             st->iso2022.mask = ~0;
+#endif
+             break;
+           }
+       }
+    }
+
+  if (!n)
+    return 0;
+#ifdef MULE
+  if (!mask_has_at_most_one_bit_p (st->iso2022.mask))
+    st->iso2022.mask = detect_coding_iso2022 (st, src, n);
+  if (!mask_has_at_most_one_bit_p (st->shift_jis.mask))
+    st->shift_jis.mask = detect_coding_sjis (st, src, n);
+  if (!mask_has_at_most_one_bit_p (st->big5.mask))
+    st->big5.mask = detect_coding_big5 (st, src, n);
+  if (!mask_has_at_most_one_bit_p (st->utf8.mask))
+    st->utf8.mask = detect_coding_utf8 (st, src, n);
+  if (!mask_has_at_most_one_bit_p (st->ucs4.mask))
+    st->ucs4.mask = detect_coding_ucs4 (st, src, n);
+
+  st->mask
+    = st->iso2022.mask | st->shift_jis.mask | st->big5.mask
+    | st->utf8.mask | st->ucs4.mask;
+#endif
+  {
+    int retval = mask_has_at_most_one_bit_p (st->mask);
+    st->mask |= CODING_CATEGORY_NO_CONVERSION_MASK;
+    return retval && st->eol_type != EOL_AUTODETECT;
+  }
+}
+
+static Lisp_Object
+coding_system_from_mask (int mask)
+{
+  if (mask == ~0)
+    {
+      /* If the file was entirely or basically ASCII, use the
+        default value of `buffer-file-coding-system'. */
+      Lisp_Object retval =
+       XBUFFER (Vbuffer_defaults)->buffer_file_coding_system;
+      if (!NILP (retval))
+       {
+         retval = Ffind_coding_system (retval);
+         if (NILP (retval))
+           {
+             warn_when_safe
+               (Qbad_variable, Qwarning,
+                "Invalid `default-buffer-file-coding-system', set to nil");
+             XBUFFER (Vbuffer_defaults)->buffer_file_coding_system = Qnil;
+           }
+       }
+      if (NILP (retval))
+       retval = Fget_coding_system (Qraw_text);
+      return retval;
+    }
+  else
+    {
+      int i;
+      int cat = -1;
+#ifdef MULE
+      mask = postprocess_iso2022_mask (mask);
+#endif
+      /* Look through the coding categories by priority and find
+        the first one that is allowed. */
+      for (i = 0; i <= CODING_CATEGORY_LAST; i++)
+       {
+         cat = coding_category_by_priority[i];
+         if ((mask & (1 << cat)) &&
+             !NILP (coding_category_system[cat]))
+           break;
+       }
+      if (cat >= 0)
+       return coding_category_system[cat];
+      else
+       return Fget_coding_system (Qraw_text);
+    }
+}
+
+/* Given a seekable read stream and potential coding system and EOL type
+   as specified, do any autodetection that is called for.  If the
+   coding system and/or EOL type are not autodetect, they will be left
+   alone; but this function will never return an autodetect coding system
+   or EOL type.
+
+   This function does not automatically fetch subsidiary coding systems;
+   that should be unnecessary with the explicit eol-type argument. */
+
+void
+determine_real_coding_system (Lstream *stream, Lisp_Object *codesys_in_out,
+                             enum eol_type *eol_type_in_out)
+{
+  struct detection_state decst;
+
+  if (*eol_type_in_out == EOL_AUTODETECT)
+    *eol_type_in_out = XCODING_SYSTEM_EOL_TYPE (*codesys_in_out);
+
+  xzero (decst);
+  decst.eol_type = *eol_type_in_out;
+  decst.mask = ~0;
+
+  /* If autodetection is called for, do it now. */
+  if (XCODING_SYSTEM_TYPE (*codesys_in_out) == CODESYS_AUTODETECT ||
+      *eol_type_in_out == EOL_AUTODETECT)
+    {
+      unsigned char random_buffer[4096];
+      int nread;
+      Lisp_Object coding_system = Qnil;
+
+      nread = Lstream_read (stream, random_buffer, sizeof (random_buffer));
+      if (nread)
+       {
+         unsigned char *cp = random_buffer;
+
+         while (cp < random_buffer + nread)
+           {
+             if ((*cp++ == 'c') && (cp < random_buffer + nread) &&
+                 (*cp++ == 'o') && (cp < random_buffer + nread) &&
+                 (*cp++ == 'd') && (cp < random_buffer + nread) &&
+                 (*cp++ == 'i') && (cp < random_buffer + nread) &&
+                 (*cp++ == 'n') && (cp < random_buffer + nread) &&
+                 (*cp++ == 'g') && (cp < random_buffer + nread) &&
+                 (*cp++ == ':') && (cp < random_buffer + nread))
+               {
+                 unsigned char coding_system_name[4096 - 6];
+                 unsigned char *np = coding_system_name;
+
+                 while ( (cp < random_buffer + nread)
+                         && ((*cp == ' ') || (*cp == '\t')) )
+                   {
+                     cp++;
+                   }
+                 while ( (cp < random_buffer + nread) &&
+                         (*cp != ' ') && (*cp != '\t') && (*cp != ';') )
+                   {
+                     *np++ = *cp++;
+                   }
+                 *np = 0;
+                 coding_system
+                   = Ffind_coding_system (intern (coding_system_name));
+                 break;
+               }
+           }
+         if (EQ(coding_system, Qnil))
+           do{
+             if (detect_coding_type (&decst, random_buffer, nread,
+                                     XCODING_SYSTEM_TYPE (*codesys_in_out)
+                                     != CODESYS_AUTODETECT))
+               break;
+             nread = Lstream_read (stream,
+                                   random_buffer, sizeof (random_buffer));
+             if (!nread)
+               break;
+           } while(1);
+       }
+      *eol_type_in_out = decst.eol_type;
+      if (XCODING_SYSTEM_TYPE (*codesys_in_out) == CODESYS_AUTODETECT)
+       {
+         if (EQ(coding_system, Qnil))
+           *codesys_in_out = coding_system_from_mask (decst.mask);
+         else
+           *codesys_in_out = coding_system;
+       }
+    }
+  /* If we absolutely can't determine the EOL type, just assume LF. */
+  if (*eol_type_in_out == EOL_AUTODETECT)
+    *eol_type_in_out = EOL_LF;
+
+  Lstream_rewind (stream);
+}
+
+DEFUN ("detect-coding-region", Fdetect_coding_region, 2, 3, 0, /*
+Detect coding system of the text in the region between START and END.
+Returned a list of possible coding systems ordered by priority.
+If only ASCII characters are found, it returns 'undecided or one of
+its subsidiary coding systems according to a detected end-of-line
+type.  Optional arg BUFFER defaults to the current buffer.
+*/
+       (start, end, buffer))
+{
+  Lisp_Object val = Qnil;
+  struct buffer *buf = decode_buffer (buffer, 0);
+  Bufpos b, e;
+  Lisp_Object instream, lb_instream;
+  Lstream *istr, *lb_istr;
+  struct detection_state decst;
+  struct gcpro gcpro1, gcpro2;
+
+  get_buffer_range_char (buf, start, end, &b, &e, 0);
+  lb_instream = make_lisp_buffer_input_stream (buf, b, e, 0);
+  lb_istr = XLSTREAM (lb_instream);
+  instream = make_encoding_input_stream (lb_istr, Fget_coding_system (Qbinary));
+  istr = XLSTREAM (instream);
+  GCPRO2 (instream, lb_instream);
+  xzero (decst);
+  decst.eol_type = EOL_AUTODETECT;
+  decst.mask = ~0;
+  while (1)
+    {
+      unsigned char random_buffer[4096];
+      int nread = Lstream_read (istr, random_buffer, sizeof (random_buffer));
+
+      if (!nread)
+       break;
+      if (detect_coding_type (&decst, random_buffer, nread, 0))
+       break;
+    }
+
+  if (decst.mask == ~0)
+    val = subsidiary_coding_system (Fget_coding_system (Qundecided),
+                                   decst.eol_type);
+  else
+    {
+      int i;
+
+      val = Qnil;
+#ifdef MULE
+      decst.mask = postprocess_iso2022_mask (decst.mask);
+#endif
+      for (i = CODING_CATEGORY_LAST; i >= 0; i--)
+       {
+         int sys = coding_category_by_priority[i];
+         if (decst.mask & (1 << sys))
+           {
+             Lisp_Object codesys = coding_category_system[sys];
+             if (!NILP (codesys))
+               codesys = subsidiary_coding_system (codesys, decst.eol_type);
+             val = Fcons (codesys, val);
+           }
+       }
+    }
+  Lstream_close (istr);
+  UNGCPRO;
+  Lstream_delete (istr);
+  Lstream_delete (lb_istr);
+  return val;
+}
+
+\f
+/************************************************************************/
+/*           Converting to internal Mule format ("decoding")            */
+/************************************************************************/
+
+/* A decoding stream is a stream used for decoding text (i.e.
+   converting from some external format to internal format).
+   The decoding-stream object keeps track of the actual coding
+   stream, the stream that is at the other end, and data that
+   needs to be persistent across the lifetime of the stream. */
+
+/* Handle the EOL stuff related to just-read-in character C.
+   EOL_TYPE is the EOL type of the coding stream.
+   FLAGS is the current value of FLAGS in the coding stream, and may
+   be modified by this macro.  (The macro only looks at the
+   CODING_STATE_CR flag.)  DST is the Dynarr to which the decoded
+   bytes are to be written.  You need to also define a local goto
+   label "label_continue_loop" that is at the end of the main
+   character-reading loop.
+
+   If C is a CR character, then this macro handles it entirely and
+   jumps to label_continue_loop.  Otherwise, this macro does not add
+   anything to DST, and continues normally.  You should continue
+   processing C normally after this macro. */
+
+#define DECODE_HANDLE_EOL_TYPE(eol_type, c, flags, dst)                \
+do {                                                           \
+  if (c == '\r')                                               \
+    {                                                          \
+      if (eol_type == EOL_CR)                                  \
+       Dynarr_add (dst, '\n');                                 \
+      else if (eol_type != EOL_CRLF || flags & CODING_STATE_CR)        \
+       Dynarr_add (dst, c);                                    \
+      else                                                     \
+       flags |= CODING_STATE_CR;                               \
+      goto label_continue_loop;                                        \
+    }                                                          \
+  else if (flags & CODING_STATE_CR)                            \
+    {  /* eol_type == CODING_SYSTEM_EOL_CRLF */                \
+      if (c != '\n')                                           \
+       Dynarr_add (dst, '\r');                                 \
+      flags &= ~CODING_STATE_CR;                               \
+    }                                                          \
+} while (0)
+
+/* C should be a binary character in the range 0 - 255; convert
+   to internal format and add to Dynarr DST. */
+
+#ifdef UTF2000
+#define DECODE_ADD_BINARY_CHAR(c, dst) \
+do {                                           \
+  if (BYTE_ASCII_P (c))                                \
+    Dynarr_add (dst, c);                       \
+  else                                         \
+    {                                          \
+      Dynarr_add (dst, (c >> 6) | 0xc0);       \
+      Dynarr_add (dst, (c & 0x3f) | 0x80);     \
+    }                                          \
+} while (0)
+
+INLINE void
+DECODE_ADD_UCS_CHAR(Emchar c, unsigned_char_dynarr* dst)
+{
+  if ( c <= 0x7f )
+    {
+      Dynarr_add (dst, c);
+    }
+  else if ( c <= 0x7ff )
+    {
+      Dynarr_add (dst, (c >> 6) | 0xc0);
+      Dynarr_add (dst, (c & 0x3f) | 0x80);
+    }
+  else if ( c <= 0xffff )
+    {
+      Dynarr_add (dst,  (c >> 12) | 0xe0);
+      Dynarr_add (dst, ((c >>  6) & 0x3f) | 0x80);
+      Dynarr_add (dst,  (c        & 0x3f) | 0x80);
+    }
+  else if ( c <= 0x1fffff )
+    {
+      Dynarr_add (dst,  (c >> 18) | 0xf0);
+      Dynarr_add (dst, ((c >> 12) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((c >>  6) & 0x3f) | 0x80);
+      Dynarr_add (dst,  (c        & 0x3f) | 0x80);
+    }
+  else if ( c <= 0x3ffffff )
+    {
+      Dynarr_add (dst,  (c >> 24) | 0xf8);
+      Dynarr_add (dst, ((c >> 18) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((c >> 12) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((c >>  6) & 0x3f) | 0x80);
+      Dynarr_add (dst,  (c        & 0x3f) | 0x80);
+    }
+  else
+    {
+      Dynarr_add (dst,  (c >> 30) | 0xfc);
+      Dynarr_add (dst, ((c >> 24) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((c >> 18) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((c >> 12) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((c >>  6) & 0x3f) | 0x80);
+      Dynarr_add (dst,  (c        & 0x3f) | 0x80);
+    }
+}
+#else
+#define DECODE_ADD_BINARY_CHAR(c, dst)         \
+do {                                           \
+  if (BYTE_ASCII_P (c))                                \
+    Dynarr_add (dst, c);                       \
+  else if (BYTE_C1_P (c))                      \
+    {                                          \
+      Dynarr_add (dst, LEADING_BYTE_CONTROL_1);        \
+      Dynarr_add (dst, c + 0x20);              \
+    }                                          \
+  else                                         \
+    {                                          \
+      Dynarr_add (dst, LEADING_BYTE_LATIN_ISO8859_1); \
+      Dynarr_add (dst, c);                     \
+    }                                          \
+} while (0)
+#endif
+
+#define DECODE_OUTPUT_PARTIAL_CHAR(ch) \
+do {                                   \
+  if (ch)                              \
+    {                                  \
+      DECODE_ADD_BINARY_CHAR (ch, dst);        \
+      ch = 0;                          \
+    }                                  \
+} while (0)
+
+#define DECODE_HANDLE_END_OF_CONVERSION(flags, ch, dst)        \
+do {                                   \
+  if (flags & CODING_STATE_END)                \
+    {                                  \
+      DECODE_OUTPUT_PARTIAL_CHAR (ch); \
+      if (flags & CODING_STATE_CR)     \
+       Dynarr_add (dst, '\r');         \
+    }                                  \
+} while (0)
+
+#define DECODING_STREAM_DATA(stream) LSTREAM_TYPE_DATA (stream, decoding)
+
+struct decoding_stream
+{
+  /* Coding system that governs the conversion. */
+  Lisp_Coding_System *codesys;
+
+  /* Stream that we read the encoded data from or
+     write the decoded data to. */
+  Lstream *other_end;
+
+  /* If we are reading, then we can return only a fixed amount of
+     data, so if the conversion resulted in too much data, we store it
+     here for retrieval the next time around. */
+  unsigned_char_dynarr *runoff;
+
+  /* FLAGS holds flags indicating the current state of the decoding.
+     Some of these flags are dependent on the coding system. */
+  unsigned int flags;
+
+  /* CH holds a partially built-up character.  Since we only deal
+     with one- and two-byte characters at the moment, we only use
+     this to store the first byte of a two-byte character. */
+  unsigned int ch;
+
+  /* EOL_TYPE specifies the type of end-of-line conversion that
+     currently applies.  We need to keep this separate from the
+     EOL type stored in CODESYS because the latter might indicate
+     automatic EOL-type detection while the former will always
+     indicate a particular EOL type. */
+  enum eol_type eol_type;
+#ifdef MULE
+  /* Additional ISO2022 information.  We define the structure above
+     because it's also needed by the detection routines. */
+  struct iso2022_decoder iso2022;
+
+  /* Additional information (the state of the running CCL program)
+     used by the CCL decoder. */
+  struct ccl_program ccl;
+
+  /* counter for UTF-8 or UCS-4 */
+  unsigned char counter;
+#endif
+  struct detection_state decst;
+};
+
+static int decoding_reader     (Lstream *stream,       unsigned char *data, size_t size);
+static int decoding_writer     (Lstream *stream, CONST unsigned char *data, size_t size);
+static int decoding_rewinder   (Lstream *stream);
+static int decoding_seekable_p (Lstream *stream);
+static int decoding_flusher    (Lstream *stream);
+static int decoding_closer     (Lstream *stream);
+
+static Lisp_Object decoding_marker (Lisp_Object stream,
+                                   void (*markobj) (Lisp_Object));
+
+DEFINE_LSTREAM_IMPLEMENTATION ("decoding", lstream_decoding,
+                              sizeof (struct decoding_stream));
+
+static Lisp_Object
+decoding_marker (Lisp_Object stream, void (*markobj) (Lisp_Object))
+{
+  Lstream *str = DECODING_STREAM_DATA (XLSTREAM (stream))->other_end;
+  Lisp_Object str_obj;
+
+  /* We do not need to mark the coding systems or charsets stored
+     within the stream because they are stored in a global list
+     and automatically marked. */
+
+  XSETLSTREAM (str_obj, str);
+  markobj (str_obj);
+  if (str->imp->marker)
+    return (str->imp->marker) (str_obj, markobj);
+  else
+    return Qnil;
+}
+
+/* Read SIZE bytes of data and store it into DATA.  We are a decoding stream
+   so we read data from the other end, decode it, and store it into DATA. */
+
+static int
+decoding_reader (Lstream *stream, unsigned char *data, size_t size)
+{
+  struct decoding_stream *str = DECODING_STREAM_DATA (stream);
+  unsigned char *orig_data = data;
+  int read_size;
+  int error_occurred = 0;
+
+  /* We need to interface to mule_decode(), which expects to take some
+     amount of data and store the result into a Dynarr.  We have
+     mule_decode() store into str->runoff, and take data from there
+     as necessary. */
+
+  /* We loop until we have enough data, reading chunks from the other
+     end and decoding it. */
+  while (1)
+    {
+      /* Take data from the runoff if we can.  Make sure to take at
+        most SIZE bytes, and delete the data from the runoff. */
+      if (Dynarr_length (str->runoff) > 0)
+       {
+         size_t chunk = min (size, (size_t) Dynarr_length (str->runoff));
+         memcpy (data, Dynarr_atp (str->runoff, 0), chunk);
+         Dynarr_delete_many (str->runoff, 0, chunk);
+         data += chunk;
+         size -= chunk;
+       }
+
+      if (size == 0)
+       break; /* No more room for data */
+
+      if (str->flags & CODING_STATE_END)
+       /* This means that on the previous iteration, we hit the EOF on
+          the other end.  We loop once more so that mule_decode() can
+          output any final stuff it may be holding, or any "go back
+          to a sane state" escape sequences. (This latter makes sense
+          during encoding.) */
+       break;
+
+      /* Exhausted the runoff, so get some more.  DATA has at least
+        SIZE bytes left of storage in it, so it's OK to read directly
+        into it.  (We'll be overwriting above, after we've decoded it
+        into the runoff.) */
+      read_size = Lstream_read (str->other_end, data, size);
+      if (read_size < 0)
+       {
+         error_occurred = 1;
+         break;
+       }
+      if (read_size == 0)
+       /* There might be some more end data produced in the translation.
+          See the comment above. */
+       str->flags |= CODING_STATE_END;
+      mule_decode (stream, data, str->runoff, read_size);
+    }
+
+  if (data - orig_data == 0)
+    return error_occurred ? -1 : 0;
+  else
+    return data - orig_data;
+}
+
+static int
+decoding_writer (Lstream *stream, CONST unsigned char *data, size_t size)
+{
+  struct decoding_stream *str = DECODING_STREAM_DATA (stream);
+  int retval;
+
+  /* Decode all our data into the runoff, and then attempt to write
+     it all out to the other end.  Remove whatever chunk we succeeded
+     in writing. */
+  mule_decode (stream, data, str->runoff, size);
+  retval = Lstream_write (str->other_end, Dynarr_atp (str->runoff, 0),
+                         Dynarr_length (str->runoff));
+  if (retval > 0)
+    Dynarr_delete_many (str->runoff, 0, retval);
+  /* Do NOT return retval.  The return value indicates how much
+     of the incoming data was written, not how many bytes were
+     written. */
+  return size;
+}
+
+static void
+reset_decoding_stream (struct decoding_stream *str)
+{
+#ifdef MULE
+  if (CODING_SYSTEM_TYPE (str->codesys) == CODESYS_ISO2022)
+    {
+      Lisp_Object coding_system;
+      XSETCODING_SYSTEM (coding_system, str->codesys);
+      reset_iso2022 (coding_system, &str->iso2022);
+    }
+  else if (CODING_SYSTEM_TYPE (str->codesys) == CODESYS_CCL)
+    {
+      setup_ccl_program (&str->ccl, CODING_SYSTEM_CCL_DECODE (str->codesys));
+    }
+  str->counter = 0;
+#endif /* MULE */
+  str->flags = str->ch = 0;
+}
+
+static int
+decoding_rewinder (Lstream *stream)
+{
+  struct decoding_stream *str = DECODING_STREAM_DATA (stream);
+  reset_decoding_stream (str);
+  Dynarr_reset (str->runoff);
+  return Lstream_rewind (str->other_end);
+}
+
+static int
+decoding_seekable_p (Lstream *stream)
+{
+  struct decoding_stream *str = DECODING_STREAM_DATA (stream);
+  return Lstream_seekable_p (str->other_end);
+}
+
+static int
+decoding_flusher (Lstream *stream)
+{
+  struct decoding_stream *str = DECODING_STREAM_DATA (stream);
+  return Lstream_flush (str->other_end);
+}
+
+static int
+decoding_closer (Lstream *stream)
+{
+  struct decoding_stream *str = DECODING_STREAM_DATA (stream);
+  if (stream->flags & LSTREAM_FL_WRITE)
+    {
+      str->flags |= CODING_STATE_END;
+      decoding_writer (stream, 0, 0);
+    }
+  Dynarr_free (str->runoff);
+#ifdef MULE
+#ifdef ENABLE_COMPOSITE_CHARS
+  if (str->iso2022.composite_chars)
+    Dynarr_free (str->iso2022.composite_chars);
+#endif
+#endif
+  return Lstream_close (str->other_end);
+}
+
+Lisp_Object
+decoding_stream_coding_system (Lstream *stream)
+{
+  Lisp_Object coding_system;
+  struct decoding_stream *str = DECODING_STREAM_DATA (stream);
+
+  XSETCODING_SYSTEM (coding_system, str->codesys);
+  return subsidiary_coding_system (coding_system, str->eol_type);
+}
+
+void
+set_decoding_stream_coding_system (Lstream *lstr, Lisp_Object codesys)
+{
+  Lisp_Coding_System *cs = XCODING_SYSTEM (codesys);
+  struct decoding_stream *str = DECODING_STREAM_DATA (lstr);
+  str->codesys = cs;
+  if (CODING_SYSTEM_EOL_TYPE (cs) != EOL_AUTODETECT)
+    str->eol_type = CODING_SYSTEM_EOL_TYPE (cs);
+  reset_decoding_stream (str);
+}
+
+/* WARNING WARNING WARNING WARNING!!!!!  If you open up a decoding
+   stream for writing, no automatic code detection will be performed.
+   The reason for this is that automatic code detection requires a
+   seekable input.  Things will also fail if you open a decoding
+   stream for reading using a non-fully-specified coding system and
+   a non-seekable input stream. */
+
+static Lisp_Object
+make_decoding_stream_1 (Lstream *stream, Lisp_Object codesys,
+                       CONST char *mode)
+{
+  Lstream *lstr = Lstream_new (lstream_decoding, mode);
+  struct decoding_stream *str = DECODING_STREAM_DATA (lstr);
+  Lisp_Object obj;
+
+  xzero (*str);
+  str->other_end = stream;
+  str->runoff = (unsigned_char_dynarr *) Dynarr_new (unsigned_char);
+  str->eol_type = EOL_AUTODETECT;
+  if (!strcmp (mode, "r")
+      && Lstream_seekable_p (stream))
+    /* We can determine the coding system now. */
+    determine_real_coding_system (stream, &codesys, &str->eol_type);
+  set_decoding_stream_coding_system (lstr, codesys);
+  str->decst.eol_type = str->eol_type;
+  str->decst.mask = ~0;
+  XSETLSTREAM (obj, lstr);
+  return obj;
+}
+
+Lisp_Object
+make_decoding_input_stream (Lstream *stream, Lisp_Object codesys)
+{
+  return make_decoding_stream_1 (stream, codesys, "r");
+}
+
+Lisp_Object
+make_decoding_output_stream (Lstream *stream, Lisp_Object codesys)
+{
+  return make_decoding_stream_1 (stream, codesys, "w");
+}
+
+/* Note: the decode_coding_* functions all take the same
+   arguments as mule_decode(), which is to say some SRC data of
+   size N, which is to be stored into dynamic array DST.
+   DECODING is the stream within which the decoding is
+   taking place, but no data is actually read from or
+   written to that stream; that is handled in decoding_reader()
+   or decoding_writer().  This allows the same functions to
+   be used for both reading and writing. */
+
+static void
+mule_decode (Lstream *decoding, CONST unsigned char *src,
+            unsigned_char_dynarr *dst, unsigned int n)
+{
+  struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
+
+  /* If necessary, do encoding-detection now.  We do this when
+     we're a writing stream or a non-seekable reading stream,
+     meaning that we can't just process the whole input,
+     rewind, and start over. */
+
+  if (CODING_SYSTEM_TYPE (str->codesys) == CODESYS_AUTODETECT ||
+      str->eol_type == EOL_AUTODETECT)
+    {
+      Lisp_Object codesys;
+
+      XSETCODING_SYSTEM (codesys, str->codesys);
+      detect_coding_type (&str->decst, src, n,
+                         CODING_SYSTEM_TYPE (str->codesys) !=
+                         CODESYS_AUTODETECT);
+      if (CODING_SYSTEM_TYPE (str->codesys) == CODESYS_AUTODETECT &&
+         str->decst.mask != ~0)
+       /* #### This is cheesy.  What we really ought to do is
+          buffer up a certain amount of data so as to get a
+          less random result. */
+       codesys = coding_system_from_mask (str->decst.mask);
+      str->eol_type = str->decst.eol_type;
+      if (XCODING_SYSTEM (codesys) != str->codesys)
+       {
+         /* Preserve the CODING_STATE_END flag in case it was set.
+            If we erase it, bad things might happen. */
+         int was_end = str->flags & CODING_STATE_END;
+          set_decoding_stream_coding_system (decoding, codesys);
+         if (was_end)
+           str->flags |= CODING_STATE_END;
+       }
+    }
+
+  switch (CODING_SYSTEM_TYPE (str->codesys))
+    {
+#ifdef DEBUG_XEMACS
+    case CODESYS_INTERNAL:
+      Dynarr_add_many (dst, src, n);
+      break;
+#endif
+    case CODESYS_AUTODETECT:
+      /* If we got this far and still haven't decided on the coding
+        system, then do no conversion. */
+    case CODESYS_NO_CONVERSION:
+      decode_coding_no_conversion (decoding, src, dst, n);
+      break;
+#ifdef MULE
+    case CODESYS_SHIFT_JIS:
+      decode_coding_sjis (decoding, src, dst, n);
+      break;
+    case CODESYS_BIG5:
+      decode_coding_big5 (decoding, src, dst, n);
+      break;
+    case CODESYS_UCS4:
+      decode_coding_ucs4 (decoding, src, dst, n);
+      break;
+    case CODESYS_UTF8:
+      decode_coding_utf8 (decoding, src, dst, n);
+      break;
+    case CODESYS_CCL:
+      str->ccl.last_block = str->flags & CODING_STATE_END;
+      ccl_driver (&str->ccl, src, dst, n, 0, CCL_MODE_DECODING);
+      break;
+    case CODESYS_ISO2022:
+      decode_coding_iso2022 (decoding, src, dst, n);
+      break;
+#endif /* MULE */
+    default:
+      abort ();
+    }
+}
+
+DEFUN ("decode-coding-region", Fdecode_coding_region, 3, 4, 0, /*
+Decode the text between START and END which is encoded in CODING-SYSTEM.
+This is useful if you've read in encoded text from a file without decoding
+it (e.g. you read in a JIS-formatted file but used the `binary' or
+`no-conversion' coding system, so that it shows up as "^[$B!<!+^[(B").
+Return length of decoded text.
+BUFFER defaults to the current buffer if unspecified.
+*/
+       (start, end, coding_system, buffer))
+{
+  Bufpos b, e;
+  struct buffer *buf = decode_buffer (buffer, 0);
+  Lisp_Object instream, lb_outstream, de_outstream, outstream;
+  Lstream *istr, *ostr;
+  struct gcpro gcpro1, gcpro2, gcpro3, gcpro4;
+
+  get_buffer_range_char (buf, start, end, &b, &e, 0);
+
+  barf_if_buffer_read_only (buf, b, e);
+
+  coding_system = Fget_coding_system (coding_system);
+  instream = make_lisp_buffer_input_stream  (buf, b, e, 0);
+  lb_outstream = make_lisp_buffer_output_stream (buf, b, 0);
+  de_outstream = make_decoding_output_stream (XLSTREAM (lb_outstream),
+                                             coding_system);
+  outstream = make_encoding_output_stream (XLSTREAM (de_outstream),
+                                          Fget_coding_system (Qbinary));
+  istr = XLSTREAM (instream);
+  ostr = XLSTREAM (outstream);
+  GCPRO4 (instream, lb_outstream, de_outstream, outstream);
+
+  /* The chain of streams looks like this:
+
+     [BUFFER] <----- send through
+                     ------> [ENCODE AS BINARY]
+                            ------> [DECODE AS SPECIFIED]
+                                    ------> [BUFFER]
+   */
+
+  while (1)
+    {
+      char tempbuf[1024]; /* some random amount */
+      Bufpos newpos, even_newer_pos;
+      Bufpos oldpos = lisp_buffer_stream_startpos (istr);
+      int size_in_bytes = Lstream_read (istr, tempbuf, sizeof (tempbuf));
+
+      if (!size_in_bytes)
+       break;
+      newpos = lisp_buffer_stream_startpos (istr);
+      Lstream_write (ostr, tempbuf, size_in_bytes);
+      even_newer_pos = lisp_buffer_stream_startpos (istr);
+      buffer_delete_range (buf, even_newer_pos - (newpos - oldpos),
+                          even_newer_pos, 0);
+    }
+  Lstream_close (istr);
+  Lstream_close (ostr);
+  UNGCPRO;
+  Lstream_delete (istr);
+  Lstream_delete (ostr);
+  Lstream_delete (XLSTREAM (de_outstream));
+  Lstream_delete (XLSTREAM (lb_outstream));
+  return Qnil;
+}
+
+\f
+/************************************************************************/
+/*           Converting to an external encoding ("encoding")            */
+/************************************************************************/
+
+/* An encoding stream is an output stream.  When you create the
+   stream, you specify the coding system that governs the encoding
+   and another stream that the resulting encoded data is to be
+   sent to, and then start sending data to it. */
+
+#define ENCODING_STREAM_DATA(stream) LSTREAM_TYPE_DATA (stream, encoding)
+
+struct encoding_stream
+{
+  /* Coding system that governs the conversion. */
+  Lisp_Coding_System *codesys;
+
+  /* Stream that we read the encoded data from or
+     write the decoded data to. */
+  Lstream *other_end;
+
+  /* If we are reading, then we can return only a fixed amount of
+     data, so if the conversion resulted in too much data, we store it
+     here for retrieval the next time around. */
+  unsigned_char_dynarr *runoff;
+
+  /* FLAGS holds flags indicating the current state of the encoding.
+     Some of these flags are dependent on the coding system. */
+  unsigned int flags;
+
+  /* CH holds a partially built-up character.  Since we only deal
+     with one- and two-byte characters at the moment, we only use
+     this to store the first byte of a two-byte character. */
+  unsigned int ch;
+#ifdef MULE
+  /* Additional information used by the ISO2022 encoder. */
+  struct
+    {
+      /* CHARSET holds the character sets currently assigned to the G0
+        through G3 registers.  It is initialized from the array
+        INITIAL_CHARSET in CODESYS. */
+      Lisp_Object charset[4];
+
+      /* Which registers are currently invoked into the left (GL) and
+        right (GR) halves of the 8-bit encoding space? */
+      int register_left, register_right;
+
+      /* Whether we need to explicitly designate the charset in the
+        G? register before using it.  It is initialized from the
+        array FORCE_CHARSET_ON_OUTPUT in CODESYS. */
+      unsigned char force_charset_on_output[4];
+
+      /* Other state variables that need to be preserved across
+        invocations. */
+      Lisp_Object current_charset;
+      int current_half;
+      int current_char_boundary;
+    } iso2022;
+
+  void (*encode_char) (struct encoding_stream *str, Emchar c,
+                      unsigned_char_dynarr *dst, unsigned int *flags);
+  void (*finish) (struct encoding_stream *str,
+                 unsigned_char_dynarr *dst, unsigned int *flags);
+
+  /* Additional information (the state of the running CCL program)
+     used by the CCL encoder. */
+  struct ccl_program ccl;
+#endif /* MULE */
+};
+
+static int encoding_reader (Lstream *stream, unsigned char *data, size_t size);
+static int encoding_writer (Lstream *stream, CONST unsigned char *data,
+                           size_t size);
+static int encoding_rewinder   (Lstream *stream);
+static int encoding_seekable_p (Lstream *stream);
+static int encoding_flusher    (Lstream *stream);
+static int encoding_closer     (Lstream *stream);
+
+static Lisp_Object encoding_marker (Lisp_Object stream,
+                                   void (*markobj) (Lisp_Object));
+
+DEFINE_LSTREAM_IMPLEMENTATION ("encoding", lstream_encoding,
+                              sizeof (struct encoding_stream));
+
+static Lisp_Object
+encoding_marker (Lisp_Object stream, void (*markobj) (Lisp_Object))
+{
+  Lstream *str = ENCODING_STREAM_DATA (XLSTREAM (stream))->other_end;
+  Lisp_Object str_obj;
+
+  /* We do not need to mark the coding systems or charsets stored
+     within the stream because they are stored in a global list
+     and automatically marked. */
+
+  XSETLSTREAM (str_obj, str);
+  markobj (str_obj);
+  if (str->imp->marker)
+    return (str->imp->marker) (str_obj, markobj);
+  else
+    return Qnil;
+}
+
+/* Read SIZE bytes of data and store it into DATA.  We are a encoding stream
+   so we read data from the other end, encode it, and store it into DATA. */
+
+static int
+encoding_reader (Lstream *stream, unsigned char *data, size_t size)
+{
+  struct encoding_stream *str = ENCODING_STREAM_DATA (stream);
+  unsigned char *orig_data = data;
+  int read_size;
+  int error_occurred = 0;
+
+  /* We need to interface to mule_encode(), which expects to take some
+     amount of data and store the result into a Dynarr.  We have
+     mule_encode() store into str->runoff, and take data from there
+     as necessary. */
+
+  /* We loop until we have enough data, reading chunks from the other
+     end and encoding it. */
+  while (1)
+    {
+      /* Take data from the runoff if we can.  Make sure to take at
+        most SIZE bytes, and delete the data from the runoff. */
+      if (Dynarr_length (str->runoff) > 0)
+       {
+         int chunk = min ((int) size, Dynarr_length (str->runoff));
+         memcpy (data, Dynarr_atp (str->runoff, 0), chunk);
+         Dynarr_delete_many (str->runoff, 0, chunk);
+         data += chunk;
+         size -= chunk;
+       }
+
+      if (size == 0)
+       break; /* No more room for data */
+
+      if (str->flags & CODING_STATE_END)
+       /* This means that on the previous iteration, we hit the EOF on
+          the other end.  We loop once more so that mule_encode() can
+          output any final stuff it may be holding, or any "go back
+          to a sane state" escape sequences. (This latter makes sense
+          during encoding.) */
+       break;
+
+      /* Exhausted the runoff, so get some more.  DATA at least SIZE bytes
+        left of storage in it, so it's OK to read directly into it.
+        (We'll be overwriting above, after we've encoded it into the
+        runoff.) */
+      read_size = Lstream_read (str->other_end, data, size);
+      if (read_size < 0)
+       {
+         error_occurred = 1;
+         break;
+       }
+      if (read_size == 0)
+       /* There might be some more end data produced in the translation.
+          See the comment above. */
+       str->flags |= CODING_STATE_END;
+      mule_encode (stream, data, str->runoff, read_size);
+    }
+
+  if (data == orig_data)
+    return error_occurred ? -1 : 0;
+  else
+    return data - orig_data;
+}
+
+static int
+encoding_writer (Lstream *stream, CONST unsigned char *data, size_t size)
+{
+  struct encoding_stream *str = ENCODING_STREAM_DATA (stream);
+  int retval;
+
+  /* Encode all our data into the runoff, and then attempt to write
+     it all out to the other end.  Remove whatever chunk we succeeded
+     in writing. */
+  mule_encode (stream, data, str->runoff, size);
+  retval = Lstream_write (str->other_end, Dynarr_atp (str->runoff, 0),
+                         Dynarr_length (str->runoff));
+  if (retval > 0)
+    Dynarr_delete_many (str->runoff, 0, retval);
+  /* Do NOT return retval.  The return value indicates how much
+     of the incoming data was written, not how many bytes were
+     written. */
+  return size;
+}
+
+static void
+reset_encoding_stream (struct encoding_stream *str)
+{
+#ifdef MULE
+  switch (CODING_SYSTEM_TYPE (str->codesys))
+    {
+    case CODESYS_ISO2022:
+      {
+       int i;
+
+       str->encode_char = &char_encode_iso2022;
+       str->finish = &char_finish_iso2022;
+       for (i = 0; i < 4; i++)
+         {
+           str->iso2022.charset[i] =
+             CODING_SYSTEM_ISO2022_INITIAL_CHARSET (str->codesys, i);
+           str->iso2022.force_charset_on_output[i] =
+             CODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT (str->codesys, i);
+         }
+       str->iso2022.register_left = 0;
+       str->iso2022.register_right = 1;
+       str->iso2022.current_charset = Qnil;
+       str->iso2022.current_half = 0;
+       break;
+      }
+    case CODESYS_CCL:
+      setup_ccl_program (&str->ccl, CODING_SYSTEM_CCL_ENCODE (str->codesys));
+      break;
+    case CODESYS_UTF8:
+      str->encode_char = &char_encode_utf8;
+      str->finish = &char_finish_utf8;
+    default:
+      break;
+    }
+#endif /* MULE */
+  str->iso2022.current_char_boundary = 0;
+  str->flags = str->ch = 0;
+}
+
+static int
+encoding_rewinder (Lstream *stream)
+{
+  struct encoding_stream *str = ENCODING_STREAM_DATA (stream);
+  reset_encoding_stream (str);
+  Dynarr_reset (str->runoff);
+  return Lstream_rewind (str->other_end);
+}
+
+static int
+encoding_seekable_p (Lstream *stream)
+{
+  struct encoding_stream *str = ENCODING_STREAM_DATA (stream);
+  return Lstream_seekable_p (str->other_end);
+}
+
+static int
+encoding_flusher (Lstream *stream)
+{
+  struct encoding_stream *str = ENCODING_STREAM_DATA (stream);
+  return Lstream_flush (str->other_end);
+}
+
+static int
+encoding_closer (Lstream *stream)
+{
+  struct encoding_stream *str = ENCODING_STREAM_DATA (stream);
+  if (stream->flags & LSTREAM_FL_WRITE)
+    {
+      str->flags |= CODING_STATE_END;
+      encoding_writer (stream, 0, 0);
+    }
+  Dynarr_free (str->runoff);
+  return Lstream_close (str->other_end);
+}
+
+Lisp_Object
+encoding_stream_coding_system (Lstream *stream)
+{
+  Lisp_Object coding_system;
+  struct encoding_stream *str = ENCODING_STREAM_DATA (stream);
+
+  XSETCODING_SYSTEM (coding_system, str->codesys);
+  return coding_system;
+}
+
+void
+set_encoding_stream_coding_system (Lstream *lstr, Lisp_Object codesys)
+{
+  Lisp_Coding_System *cs = XCODING_SYSTEM (codesys);
+  struct encoding_stream *str = ENCODING_STREAM_DATA (lstr);
+  str->codesys = cs;
+  reset_encoding_stream (str);
+}
+
+static Lisp_Object
+make_encoding_stream_1 (Lstream *stream, Lisp_Object codesys,
+                       CONST char *mode)
+{
+  Lstream *lstr = Lstream_new (lstream_encoding, mode);
+  struct encoding_stream *str = ENCODING_STREAM_DATA (lstr);
+  Lisp_Object obj;
+
+  xzero (*str);
+  str->runoff = Dynarr_new (unsigned_char);
+  str->other_end = stream;
+  set_encoding_stream_coding_system (lstr, codesys);
+  XSETLSTREAM (obj, lstr);
+  return obj;
+}
+
+Lisp_Object
+make_encoding_input_stream (Lstream *stream, Lisp_Object codesys)
+{
+  return make_encoding_stream_1 (stream, codesys, "r");
+}
+
+Lisp_Object
+make_encoding_output_stream (Lstream *stream, Lisp_Object codesys)
+{
+  return make_encoding_stream_1 (stream, codesys, "w");
+}
+
+/* Convert N bytes of internally-formatted data stored in SRC to an
+   external format, according to the encoding stream ENCODING.
+   Store the encoded data into DST. */
+
+static void
+mule_encode (Lstream *encoding, CONST unsigned char *src,
+            unsigned_char_dynarr *dst, unsigned int n)
+{
+  struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
+
+  switch (CODING_SYSTEM_TYPE (str->codesys))
+    {
+#ifdef DEBUG_XEMACS
+    case CODESYS_INTERNAL:
+      Dynarr_add_many (dst, src, n);
+      break;
+#endif
+    case CODESYS_AUTODETECT:
+      /* If we got this far and still haven't decided on the coding
+        system, then do no conversion. */
+    case CODESYS_NO_CONVERSION:
+      encode_coding_no_conversion (encoding, src, dst, n);
+      break;
+#ifdef MULE
+    case CODESYS_SHIFT_JIS:
+      encode_coding_sjis (encoding, src, dst, n);
+      break;
+    case CODESYS_BIG5:
+      encode_coding_big5 (encoding, src, dst, n);
+      break;
+    case CODESYS_UCS4:
+      encode_coding_ucs4 (encoding, src, dst, n);
+      break;
+    case CODESYS_CCL:
+      str->ccl.last_block = str->flags & CODING_STATE_END;
+      ccl_driver (&str->ccl, src, dst, n, 0, CCL_MODE_ENCODING);
+      break;
+#endif /* MULE */
+    default:
+      text_encode_generic (encoding, src, dst, n);
+    }
+}
+
+DEFUN ("encode-coding-region", Fencode_coding_region, 3, 4, 0, /*
+Encode the text between START and END using CODING-SYSTEM.
+This will, for example, convert Japanese characters into stuff such as
+"^[$B!<!+^[(B" if you use the JIS encoding.  Return length of encoded
+text.  BUFFER defaults to the current buffer if unspecified.
+*/
+       (start, end, coding_system, buffer))
+{
+  Bufpos b, e;
+  struct buffer *buf = decode_buffer (buffer, 0);
+  Lisp_Object instream, lb_outstream, de_outstream, outstream;
+  Lstream *istr, *ostr;
+  struct gcpro gcpro1, gcpro2, gcpro3, gcpro4;
+
+  get_buffer_range_char (buf, start, end, &b, &e, 0);
+
+  barf_if_buffer_read_only (buf, b, e);
+
+  coding_system = Fget_coding_system (coding_system);
+  instream  = make_lisp_buffer_input_stream  (buf, b, e, 0);
+  lb_outstream = make_lisp_buffer_output_stream (buf, b, 0);
+  de_outstream = make_decoding_output_stream (XLSTREAM (lb_outstream),
+                                             Fget_coding_system (Qbinary));
+  outstream = make_encoding_output_stream (XLSTREAM (de_outstream),
+                                          coding_system);
+  istr = XLSTREAM (instream);
+  ostr = XLSTREAM (outstream);
+  GCPRO4 (instream, outstream, de_outstream, lb_outstream);
+  /* The chain of streams looks like this:
+
+     [BUFFER] <----- send through
+                     ------> [ENCODE AS SPECIFIED]
+                            ------> [DECODE AS BINARY]
+                                    ------> [BUFFER]
+   */
+  while (1)
+    {
+      char tempbuf[1024]; /* some random amount */
+      Bufpos newpos, even_newer_pos;
+      Bufpos oldpos = lisp_buffer_stream_startpos (istr);
+      int size_in_bytes = Lstream_read (istr, tempbuf, sizeof (tempbuf));
+
+      if (!size_in_bytes)
+       break;
+      newpos = lisp_buffer_stream_startpos (istr);
+      Lstream_write (ostr, tempbuf, size_in_bytes);
+      even_newer_pos = lisp_buffer_stream_startpos (istr);
+      buffer_delete_range (buf, even_newer_pos - (newpos - oldpos),
+                          even_newer_pos, 0);
+    }
+
+  {
+    Charcount retlen =
+      lisp_buffer_stream_startpos (XLSTREAM (instream)) - b;
+    Lstream_close (istr);
+    Lstream_close (ostr);
+    UNGCPRO;
+    Lstream_delete (istr);
+    Lstream_delete (ostr);
+    Lstream_delete (XLSTREAM (de_outstream));
+    Lstream_delete (XLSTREAM (lb_outstream));
+    return make_int (retlen);
+  }
+}
+
+#ifdef MULE
+\f
+static void
+text_encode_generic (Lstream *encoding, CONST unsigned char *src,
+                    unsigned_char_dynarr *dst, unsigned int n)
+{
+  unsigned char c;
+  unsigned char char_boundary;
+  struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
+  unsigned int flags          = str->flags;
+  Emchar ch                   = str->ch;
+  Lisp_Object charset;
+  int half;
+
+  char_boundary = str->iso2022.current_char_boundary;
+  charset = str->iso2022.current_charset;
+  half = str->iso2022.current_half;
+
+  while (n--)
+    {
+      c = *src++;
+
+      switch (char_boundary)
+       {
+       case 0:
+         if ( c >= 0xfc )
+           {
+             ch = c & 0x01;
+             char_boundary = 5;
+           }
+         else if ( c >= 0xf8 )
+           {
+             ch = c & 0x03;
+             char_boundary = 4;
+           }
+         else if ( c >= 0xf0 )
+           {
+             ch = c & 0x07;
+             char_boundary = 3;
+           }
+         else if ( c >= 0xe0 )
+           {
+             ch = c & 0x0f;
+             char_boundary = 2;
+           }
+         else if ( c >= 0xc0 )
+           {
+             ch = c & 0x1f;
+             char_boundary = 1;
+           }
+         else
+           {
+             (*str->encode_char) (str, c, dst, &flags);
+             ch = 0;
+             char_boundary = 0;
+           }
+         break;
+       case 1:
+         (*str->encode_char) (str, (ch << 6) | (c & 0x3f), dst, &flags);
+         ch =0;
+         char_boundary = 0;
+         break;
+       default:
+         ch = ( ch << 6 ) | ( c & 0x3f );
+         char_boundary--;
+       }
+    }
+
+  if ( (char_boundary == 0) && flags & CODING_STATE_END)
+    {
+      (*str->finish) (str, dst, &flags);
+    }
+
+  str->flags = flags;
+  str->ch    = ch;
+  str->iso2022.current_char_boundary = char_boundary;
+  str->iso2022.current_charset = charset;
+  str->iso2022.current_half = half;
+
+  /* Verbum caro factum est! */
+}
+
+\f
+/************************************************************************/
+/*                          Shift-JIS methods                           */
+/************************************************************************/
+
+/* Shift-JIS is a coding system encoding three character sets: ASCII, right
+   half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
+   as is.  A character of JISX0201-Kana (DIMENSION1_CHARS94 character set) is
+   encoded by "position-code + 0x80".  A character of JISX0208
+   (DIMENSION2_CHARS94 character set) is encoded in 2-byte but two
+   position-codes are divided and shifted so that it fit in the range
+   below.
+
+   --- CODE RANGE of Shift-JIS ---
+   (character set)     (range)
+   ASCII               0x00 .. 0x7F
+   JISX0201-Kana       0xA0 .. 0xDF
+   JISX0208 (1st byte) 0x80 .. 0x9F and 0xE0 .. 0xEF
+           (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
+   -------------------------------
+
+*/
+
+/* Is this the first byte of a Shift-JIS two-byte char? */
+
+#define BYTE_SJIS_TWO_BYTE_1_P(c) \
+  (((c) >= 0x81 && (c) <= 0x9F) || ((c) >= 0xE0 && (c) <= 0xEF))
+
+/* Is this the second byte of a Shift-JIS two-byte char? */
+
+#define BYTE_SJIS_TWO_BYTE_2_P(c) \
+  (((c) >= 0x40 && (c) <= 0x7E) || ((c) >= 0x80 && (c) <= 0xFC))
+
+#define BYTE_SJIS_KATAKANA_P(c)        \
+  ((c) >= 0xA1 && (c) <= 0xDF)
+
+static int
+detect_coding_sjis (struct detection_state *st, CONST unsigned char *src,
+                   unsigned int n)
+{
+  int c;
+
+  while (n--)
+    {
+      c = *src++;
+      if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
+       return 0;
+      if (st->shift_jis.in_second_byte)
+       {
+         st->shift_jis.in_second_byte = 0;
+         if (c < 0x40)
+           return 0;
+       }
+      else if ((c >= 0x80 && c < 0xA0) || c >= 0xE0)
+       st->shift_jis.in_second_byte = 1;
+    }
+  return CODING_CATEGORY_SHIFT_JIS_MASK;
+}
+
+/* Convert Shift-JIS data to internal format. */
+
+static void
+decode_coding_sjis (Lstream *decoding, CONST unsigned char *src,
+                   unsigned_char_dynarr *dst, unsigned int n)
+{
+  unsigned char c;
+  struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
+  unsigned int flags  = str->flags;
+  unsigned int ch     = str->ch;
+  eol_type_t eol_type = str->eol_type;
+
+  while (n--)
+    {
+      c = *src++;
+
+      if (ch)
+       {
+         /* Previous character was first byte of Shift-JIS Kanji char. */
+         if (BYTE_SJIS_TWO_BYTE_2_P (c))
+           {
+             unsigned char e1, e2;
+
+             DECODE_SJIS (ch, c, e1, e2);
+#ifdef UTF2000
+             DECODE_ADD_UCS_CHAR(MAKE_CHAR(Vcharset_japanese_jisx0208,
+                                           e1 & 0x7F,
+                                           e2 & 0x7F), dst);
+#else
+             Dynarr_add (dst, LEADING_BYTE_JAPANESE_JISX0208);
+             Dynarr_add (dst, e1);
+             Dynarr_add (dst, e2);
+#endif
+           }
+         else
+           {
+             DECODE_ADD_BINARY_CHAR (ch, dst);
+             DECODE_ADD_BINARY_CHAR (c, dst);
+           }
+         ch = 0;
+       }
+      else
+       {
+         DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst);
+         if (BYTE_SJIS_TWO_BYTE_1_P (c))
+           ch = c;
+         else if (BYTE_SJIS_KATAKANA_P (c))
+           {
+#ifdef UTF2000
+             DECODE_ADD_UCS_CHAR(MAKE_CHAR(Vcharset_katakana_jisx0201,
+                                           c & 0x7F, 0), dst);
+#else
+             Dynarr_add (dst, LEADING_BYTE_KATAKANA_JISX0201);
+             Dynarr_add (dst, c);
+#endif
+           }
+         else
+           DECODE_ADD_BINARY_CHAR (c, dst);
+       }
+    label_continue_loop:;
+    }
+
+  DECODE_HANDLE_END_OF_CONVERSION (flags, ch, dst);
+
+  str->flags = flags;
+  str->ch    = ch;
+}
+
+/* Convert internally-formatted data to Shift-JIS. */
+
+static void
+encode_coding_sjis (Lstream *encoding, CONST unsigned char *src,
+                   unsigned_char_dynarr *dst, unsigned int n)
+{
+  unsigned char c;
+  struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
+  unsigned int flags  = str->flags;
+  unsigned int ch     = str->ch;
+  eol_type_t eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys);
+#ifdef UTF2000
+  unsigned char char_boundary = str->iso2022.current_char_boundary;
+#endif
+
+  while (n--)
+    {
+      c = *src++;
+#ifdef UTF2000
+      switch (char_boundary)
+       {
+       case 0:
+         if ( c >= 0xfc )
+           {
+             ch = c & 0x01;
+             char_boundary = 5;
+           }
+         else if ( c >= 0xf8 )
+           {
+             ch = c & 0x03;
+             char_boundary = 4;
+           }
+         else if ( c >= 0xf0 )
+           {
+             ch = c & 0x07;
+             char_boundary = 3;
+           }
+         else if ( c >= 0xe0 )
+           {
+             ch = c & 0x0f;
+             char_boundary = 2;
+           }
+         else if ( c >= 0xc0 )
+           {
+             ch = c & 0x1f;
+             char_boundary = 1;
+           }
+         else
+           {
+             ch = 0;
+             if (c == '\n')
+               {
+                 if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
+                   Dynarr_add (dst, '\r');
+                 if (eol_type != EOL_CR)
+                   Dynarr_add (dst, c);
+               }
+             else
+               Dynarr_add (dst, c);
+             char_boundary = 0;
+           }
+         break;
+       case 1:
+         ch = ( ch << 6 ) | ( c & 0x3f );
+         {
+           Lisp_Object charset;
+           unsigned int c1, c2, s1, s2;
+           
+           BREAKUP_CHAR (ch, charset, c1, c2);
+           if (EQ(charset, Vcharset_katakana_jisx0201))
+             {
+               Dynarr_add (dst, c1 | 0x80);
+             }
+           else if (EQ(charset, Vcharset_japanese_jisx0208))
+             {
+               ENCODE_SJIS (c1 | 0x80, c2 | 0x80, s1, s2);
+               Dynarr_add (dst, s1);
+               Dynarr_add (dst, s2);
+             }
+         }
+         char_boundary = 0;
+         break;
+       default:
+         ch = ( ch << 6 ) | ( c & 0x3f );
+         char_boundary--;
+       }
+#else
+      if (c == '\n')
+       {
+         if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
+           Dynarr_add (dst, '\r');
+         if (eol_type != EOL_CR)
+           Dynarr_add (dst, '\n');
+         ch = 0;
+       }
+      else if (BYTE_ASCII_P (c))
+       {
+         Dynarr_add (dst, c);
+         ch = 0;
+       }
+      else if (BUFBYTE_LEADING_BYTE_P (c))
+       ch = (c == LEADING_BYTE_KATAKANA_JISX0201 ||
+             c == LEADING_BYTE_JAPANESE_JISX0208_1978 ||
+             c == LEADING_BYTE_JAPANESE_JISX0208) ? c : 0;
+      else if (ch)
+       {
+         if (ch == LEADING_BYTE_KATAKANA_JISX0201)
+           {
+             Dynarr_add (dst, c);
+             ch = 0;
+           }
+         else if (ch == LEADING_BYTE_JAPANESE_JISX0208_1978 ||
+                  ch == LEADING_BYTE_JAPANESE_JISX0208)
+           ch = c;
+         else
+           {
+             unsigned char j1, j2;
+             ENCODE_SJIS (ch, c, j1, j2);
+             Dynarr_add (dst, j1);
+             Dynarr_add (dst, j2);
+             ch = 0;
+           }
+       }
+#endif
+    }
+
+  str->flags = flags;
+  str->ch    = ch;
+#ifdef UTF2000
+  str->iso2022.current_char_boundary = char_boundary;
+#endif
+}
+
+DEFUN ("decode-shift-jis-char", Fdecode_shift_jis_char, 1, 1, 0, /*
+Decode a JISX0208 character of Shift-JIS coding-system.
+CODE is the character code in Shift-JIS as a cons of type bytes.
+Return the corresponding character.
+*/
+       (code))
+{
+  unsigned char c1, c2, s1, s2;
+
+  CHECK_CONS (code);
+  CHECK_INT (XCAR (code));
+  CHECK_INT (XCDR (code));
+  s1 = XINT (XCAR (code));
+  s2 = XINT (XCDR (code));
+  if (BYTE_SJIS_TWO_BYTE_1_P (s1) &&
+      BYTE_SJIS_TWO_BYTE_2_P (s2))
+    {
+      DECODE_SJIS (s1, s2, c1, c2);
+      return make_char (MAKE_CHAR (Vcharset_japanese_jisx0208,
+                                  c1 & 0x7F, c2 & 0x7F));
+    }
+  else
+    return Qnil;
+}
+
+DEFUN ("encode-shift-jis-char", Fencode_shift_jis_char, 1, 1, 0, /*
+Encode a JISX0208 character CHAR to SHIFT-JIS coding-system.
+Return the corresponding character code in SHIFT-JIS as a cons of two bytes.
+*/
+       (ch))
+{
+  Lisp_Object charset;
+  int c1, c2, s1, s2;
+
+  CHECK_CHAR_COERCE_INT (ch);
+  BREAKUP_CHAR (XCHAR (ch), charset, c1, c2);
+  if (EQ (charset, Vcharset_japanese_jisx0208))
+    {
+      ENCODE_SJIS (c1 | 0x80, c2 | 0x80, s1, s2);
+      return Fcons (make_int (s1), make_int (s2));
+    }
+  else
+    return Qnil;
+}
+
+\f
+/************************************************************************/
+/*                            Big5 methods                              */
+/************************************************************************/
+
+/* BIG5 is a coding system encoding two character sets: ASCII and
+   Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
+   character set and is encoded in two-byte.
+
+   --- CODE RANGE of BIG5 ---
+   (character set)     (range)
+   ASCII               0x00 .. 0x7F
+   Big5 (1st byte)     0xA1 .. 0xFE
+       (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
+   --------------------------
+
+   Since the number of characters in Big5 is larger than maximum
+   characters in Emacs' charset (96x96), it can't be handled as one
+   charset.  So, in Emacs, Big5 is divided into two: `charset-big5-1'
+   and `charset-big5-2'.  Both <type>s are DIMENSION2_CHARS94.  The former
+   contains frequently used characters and the latter contains less
+   frequently used characters.  */
+
+#define BYTE_BIG5_TWO_BYTE_1_P(c) \
+  ((c) >= 0xA1 && (c) <= 0xFE)
+
+/* Is this the second byte of a Shift-JIS two-byte char? */
+
+#define BYTE_BIG5_TWO_BYTE_2_P(c) \
+  (((c) >= 0x40 && (c) <= 0x7E) || ((c) >= 0xA1 && (c) <= 0xFE))
+
+/* Number of Big5 characters which have the same code in 1st byte.  */
+
+#define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
+
+/* Code conversion macros.  These are macros because they are used in
+   inner loops during code conversion.
+
+   Note that temporary variables in macros introduce the classic
+   dynamic-scoping problems with variable names.  We use capital-
+   lettered variables in the assumption that XEmacs does not use
+   capital letters in variables except in a very formalized way
+   (e.g. Qstring). */
+
+/* Convert Big5 code (b1, b2) into its internal string representation
+   (lb, c1, c2). */
+
+/* There is a much simpler way to split the Big5 charset into two.
+   For the moment I'm going to leave the algorithm as-is because it
+   claims to separate out the most-used characters into a single
+   charset, which perhaps will lead to optimizations in various
+   places.
+
+   The way the algorithm works is something like this:
+
+   Big5 can be viewed as a 94x157 charset, where the row is
+   encoded into the bytes 0xA1 .. 0xFE and the column is encoded
+   into the bytes 0x40 .. 0x7E and 0xA1 .. 0xFE.  As for frequency,
+   the split between low and high column numbers is apparently
+   meaningless; ascending rows produce less and less frequent chars.
+   Therefore, we assign the lower half of rows (0xA1 .. 0xC8) to
+   the first charset, and the upper half (0xC9 .. 0xFE) to the
+   second.  To do the conversion, we convert the character into
+   a single number where 0 .. 156 is the first row, 157 .. 313
+   is the second, etc.  That way, the characters are ordered by
+   decreasing frequency.  Then we just chop the space in two
+   and coerce the result into a 94x94 space.
+   */
+
+#define DECODE_BIG5(b1, b2, lb, c1, c2) do                             \
+{                                                                      \
+  int B1 = b1, B2 = b2;                                                        \
+  unsigned int I                                                       \
+    = (B1 - 0xA1) * BIG5_SAME_ROW + B2 - (B2 < 0x7F ? 0x40 : 0x62);    \
+                                                                       \
+  if (B1 < 0xC9)                                                       \
+    {                                                                  \
+      lb = LEADING_BYTE_CHINESE_BIG5_1;                                        \
+    }                                                                  \
+  else                                                                 \
+    {                                                                  \
+      lb = LEADING_BYTE_CHINESE_BIG5_2;                                        \
+      I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);                            \
+    }                                                                  \
+  c1 = I / (0xFF - 0xA1) + 0xA1;                                       \
+  c2 = I % (0xFF - 0xA1) + 0xA1;                                       \
+} while (0)
+
+/* Convert the internal string representation of a Big5 character
+   (lb, c1, c2) into Big5 code (b1, b2). */
+
+#define ENCODE_BIG5(lb, c1, c2, b1, b2) do                             \
+{                                                                      \
+  unsigned int I = ((c1) - 0xA1) * (0xFF - 0xA1) + ((c2) - 0xA1);      \
+                                                                       \
+  if (lb == LEADING_BYTE_CHINESE_BIG5_2)                               \
+    {                                                                  \
+      I += BIG5_SAME_ROW * (0xC9 - 0xA1);                              \
+    }                                                                  \
+  b1 = I / BIG5_SAME_ROW + 0xA1;                                       \
+  b2 = I % BIG5_SAME_ROW;                                              \
+  b2 += b2 < 0x3F ? 0x40 : 0x62;                                       \
+} while (0)
+
+static int
+detect_coding_big5 (struct detection_state *st, CONST unsigned char *src,
+                   unsigned int n)
+{
+  int c;
+
+  while (n--)
+    {
+      c = *src++;
+      if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO ||
+         (c >= 0x80 && c <= 0xA0))
+       return 0;
+      if (st->big5.in_second_byte)
+       {
+         st->big5.in_second_byte = 0;
+         if (c < 0x40 || (c >= 0x80 && c <= 0xA0))
+           return 0;
+       }
+      else if (c >= 0xA1)
+       st->big5.in_second_byte = 1;
+    }
+  return CODING_CATEGORY_BIG5_MASK;
+}
+
+/* Convert Big5 data to internal format. */
+
+static void
+decode_coding_big5 (Lstream *decoding, CONST unsigned char *src,
+                   unsigned_char_dynarr *dst, unsigned int n)
+{
+  unsigned char c;
+  struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
+  unsigned int flags  = str->flags;
+  unsigned int ch     = str->ch;
+  eol_type_t eol_type = str->eol_type;
+
+  while (n--)
+    {
+      c = *src++;
+      if (ch)
+       {
+         /* Previous character was first byte of Big5 char. */
+         if (BYTE_BIG5_TWO_BYTE_2_P (c))
+           {
+             unsigned char b1, b2, b3;
+             DECODE_BIG5 (ch, c, b1, b2, b3);
+             Dynarr_add (dst, b1);
+             Dynarr_add (dst, b2);
+             Dynarr_add (dst, b3);
+           }
+         else
+           {
+             DECODE_ADD_BINARY_CHAR (ch, dst);
+             DECODE_ADD_BINARY_CHAR (c, dst);
+           }
+         ch = 0;
+       }
+      else
+       {
+         DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst);
+         if (BYTE_BIG5_TWO_BYTE_1_P (c))
+           ch = c;
+         else
+           DECODE_ADD_BINARY_CHAR (c, dst);
+       }
+    label_continue_loop:;
+    }
+
+  DECODE_HANDLE_END_OF_CONVERSION (flags, ch, dst);
+
+  str->flags = flags;
+  str->ch    = ch;
+}
+
+/* Convert internally-formatted data to Big5. */
+
+static void
+encode_coding_big5 (Lstream *encoding, CONST unsigned char *src,
+                   unsigned_char_dynarr *dst, unsigned int n)
+{
+#ifndef UTF2000
+  unsigned char c;
+  struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
+  unsigned int flags  = str->flags;
+  unsigned int ch     = str->ch;
+  eol_type_t eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys);
+
+  while (n--)
+    {
+      c = *src++;
+      if (c == '\n')
+       {
+         if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
+           Dynarr_add (dst, '\r');
+         if (eol_type != EOL_CR)
+           Dynarr_add (dst, '\n');
+       }
+      else if (BYTE_ASCII_P (c))
+       {
+         /* ASCII. */
+         Dynarr_add (dst, c);
+       }
+      else if (BUFBYTE_LEADING_BYTE_P (c))
+       {
+         if (c == LEADING_BYTE_CHINESE_BIG5_1 ||
+             c == LEADING_BYTE_CHINESE_BIG5_2)
+           {
+             /* A recognized leading byte. */
+             ch = c;
+             continue; /* not done with this character. */
+           }
+         /* otherwise just ignore this character. */
+       }
+      else if (ch == LEADING_BYTE_CHINESE_BIG5_1 ||
+              ch == LEADING_BYTE_CHINESE_BIG5_2)
+       {
+         /* Previous char was a recognized leading byte. */
+         ch = (ch << 8) | c;
+         continue; /* not done with this character. */
+       }
+      else if (ch)
+       {
+         /* Encountering second byte of a Big5 character. */
+         unsigned char b1, b2;
+
+         ENCODE_BIG5 (ch >> 8, ch & 0xFF, c, b1, b2);
+         Dynarr_add (dst, b1);
+         Dynarr_add (dst, b2);
+       }
+
+      ch = 0;
+    }
+
+  str->flags = flags;
+  str->ch    = ch;
+#endif
+}
+
+
+DEFUN ("decode-big5-char", Fdecode_big5_char, 1, 1, 0, /*
+Decode a Big5 character CODE of BIG5 coding-system.
+CODE is the character code in BIG5, a cons of two integers.
+Return the corresponding character.
+*/
+       (code))
+{
+  unsigned char c1, c2, b1, b2;
+
+  CHECK_CONS (code);
+  CHECK_INT (XCAR (code));
+  CHECK_INT (XCDR (code));
+  b1 = XINT (XCAR (code));
+  b2 = XINT (XCDR (code));
+  if (BYTE_BIG5_TWO_BYTE_1_P (b1) &&
+      BYTE_BIG5_TWO_BYTE_2_P (b2))
+    {
+      Charset_ID leading_byte;
+      Lisp_Object charset;
+      DECODE_BIG5 (b1, b2, leading_byte, c1, c2);
+      charset = CHARSET_BY_LEADING_BYTE (leading_byte);
+      return make_char (MAKE_CHAR (charset, c1 & 0x7F, c2 & 0x7F));
+    }
+  else
+    return Qnil;
+}
+
+DEFUN ("encode-big5-char", Fencode_big5_char, 1, 1, 0, /*
+Encode the Big5 character CH to BIG5 coding-system.
+Return the corresponding character code in Big5.
+*/
+       (ch))
+{
+  Lisp_Object charset;
+  int c1, c2, b1, b2;
+
+  CHECK_CHAR_COERCE_INT (ch);
+  BREAKUP_CHAR (XCHAR (ch), charset, c1, c2);
+  if (EQ (charset, Vcharset_chinese_big5_1) ||
+      EQ (charset, Vcharset_chinese_big5_2))
+    {
+      ENCODE_BIG5 (XCHARSET_LEADING_BYTE (charset), c1 | 0x80, c2 | 0x80,
+                  b1, b2);
+      return Fcons (make_int (b1), make_int (b2));
+    }
+  else
+    return Qnil;
+}
+
+\f
+/************************************************************************/
+/*                           UCS-4 methods                              */
+/*                                                                      */
+/*  UCS-4 character codes are implemented as nonnegative integers.      */
+/*                                                                      */
+/************************************************************************/
+
+Lisp_Object ucs_to_mule_table[65536];
+Lisp_Object mule_to_ucs_table;
+
+DEFUN ("set-ucs-char", Fset_ucs_char, 2, 2, 0, /*
+Map UCS-4 code CODE to Mule character CHARACTER.
+
+Return T on success, NIL on failure.
+*/
+       (code, character))
+{
+  unsigned int c;
+
+  CHECK_CHAR (character);
+  CHECK_INT (code);
+  c = XINT (code);
+
+  if (c < sizeof (ucs_to_mule_table))
+    {
+      ucs_to_mule_table[c] = character;
+      return Qt;
+    }
+  else
+    return Qnil;
+}
+
+static Lisp_Object
+ucs_to_char (unsigned long code)
+{
+  if (code < sizeof (ucs_to_mule_table))
+    {
+      return ucs_to_mule_table[code];
+    }
+  else if ((0xe00000 <= code) && (code <= 0xe00000 + 94 * 94 * 14))
+    {
+      unsigned int c;
+
+      code -= 0xe00000;
+      c = code % (94 * 94);
+      return make_char
+       (MAKE_CHAR (CHARSET_BY_ATTRIBUTES
+                   (CHARSET_TYPE_94X94, code / (94 * 94) + '@',
+                    CHARSET_LEFT_TO_RIGHT),
+                   c / 94 + 33, c % 94 + 33));
+    }
+  else
+    return Qnil;
+}
+
+DEFUN ("ucs-char", Fucs_char, 1, 1, 0, /*
+Return Mule character corresponding to UCS code CODE (a positive integer).
+*/
+       (code))
+{
+  CHECK_NATNUM (code);
+  return ucs_to_char (XINT (code));
+}
+
+DEFUN ("set-char-ucs", Fset_char_ucs, 2, 2, 0, /*
+Map Mule character CHARACTER to UCS code CODE (a positive integer).
+*/
+       (character, code))
+{
+  /* #### Isn't this gilding the lily?  Fput_char_table checks its args.
+          Fset_char_ucs is more restrictive on index arg, but should
+          check code arg in a char_table method. */
+  CHECK_CHAR (character);
+  CHECK_NATNUM (code);
+  return Fput_char_table (character, code, mule_to_ucs_table);
+}
+
+DEFUN ("char-ucs", Fchar_ucs, 1, 1, 0, /*
+Return the UCS code (a positive integer) corresponding to CHARACTER.
+*/
+       (character))
+{
+  return Fget_char_table (character, mule_to_ucs_table);
+}
+
+#ifdef UTF2000
+#define decode_ucs4 DECODE_ADD_UCS_CHAR
+#else
+/* Decode a UCS-4 character into a buffer.  If the lookup fails, use
+   <GETA MARK> (U+3013) of JIS X 0208, which means correct character
+   is not found, instead.
+   #### do something more appropriate (use blob?)
+        Danger, Will Robinson!  Data loss.  Should we signal user? */
+static void
+decode_ucs4 (unsigned long ch, unsigned_char_dynarr *dst)
+{
+  Lisp_Object chr = ucs_to_char (ch);
+
+  if (! NILP (chr))
+    {
+      Bufbyte work[MAX_EMCHAR_LEN];
+      int len;
+
+      ch = XCHAR (chr);
+      len = (ch < 128) ?
+       simple_set_charptr_emchar (work, ch) :
+       non_ascii_set_charptr_emchar (work, ch);
+      Dynarr_add_many (dst, work, len);
+    }
+  else
+    {
+      Dynarr_add (dst, LEADING_BYTE_JAPANESE_JISX0208);
+      Dynarr_add (dst, 34 + 128);
+      Dynarr_add (dst, 46 + 128);
+    }
+}
+#endif
+
+static unsigned long
+mule_char_to_ucs4 (Lisp_Object charset,
+                  unsigned char h, unsigned char l)
+{
+  Lisp_Object code
+    = Fget_char_table (make_char (MAKE_CHAR (charset, h & 127, l & 127)),
+                      mule_to_ucs_table);
+
+  if (INTP (code))
+    {
+      return XINT (code);
+    }
+  else if ( (XCHARSET_DIMENSION (charset) == 2) &&
+           (XCHARSET_CHARS (charset) == 94) )
+    {
+      unsigned char final = XCHARSET_FINAL (charset);
+
+      if ( ('@' <= final) && (final < 0x7f) )
+       {
+         return 0xe00000 + (final - '@') * 94 * 94
+           + ((h & 127) - 33) * 94 + (l & 127) - 33;
+       }
+      else
+       {
+         return '?';
+       }
+    }
+  else
+    {
+      return '?';
+    }
+}
+
+static void
+encode_ucs4 (Lisp_Object charset,
+            unsigned char h, unsigned char l, unsigned_char_dynarr *dst)
+{
+  unsigned long code = mule_char_to_ucs4 (charset, h, l);
+  Dynarr_add (dst,  code >> 24);
+  Dynarr_add (dst, (code >> 16) & 255);
+  Dynarr_add (dst, (code >>  8) & 255);
+  Dynarr_add (dst,  code        & 255);
+}
+
+static int
+detect_coding_ucs4 (struct detection_state *st, CONST unsigned char *src,
+                   unsigned int n)
+{
+  while (n--)
+    {
+      int c = *src++;
+      switch (st->ucs4.in_byte)
+       {
+       case 0:
+         if (c >= 128)
+           return 0;
+         else
+           st->ucs4.in_byte++;
+         break;
+       case 3:
+         st->ucs4.in_byte = 0;
+         break;
+       default:
+         st->ucs4.in_byte++;
+       }
+    }
+  return CODING_CATEGORY_UCS4_MASK;
+}
+
+static void
+decode_coding_ucs4 (Lstream *decoding, CONST unsigned char *src,
+                   unsigned_char_dynarr *dst, unsigned int n)
+{
+  struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
+  unsigned int flags = str->flags;
+  unsigned int ch    = str->ch;
+  unsigned char counter = str->counter;
+
+  while (n--)
+    {
+      unsigned char c = *src++;
+      switch (counter)
+       {
+       case 0:
+         ch = c;
+         counter = 3;
+         break;
+       case 1:
+         decode_ucs4 ( ( ch << 8 ) | c, dst);
+         ch = 0;
+         counter = 0;
+         break;
+       default:
+         ch = ( ch << 8 ) | c;
+         counter--;
+       }
+    }
+  if (counter & CODING_STATE_END)
+    DECODE_OUTPUT_PARTIAL_CHAR (ch);
+
+  str->flags = flags;
+  str->ch    = ch;
+  str->counter = counter;
+}
+
+static void
+encode_coding_ucs4 (Lstream *encoding, CONST unsigned char *src,
+                   unsigned_char_dynarr *dst, unsigned int n)
+{
+#ifndef UTF2000
+  struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
+  unsigned int flags = str->flags;
+  unsigned int ch = str->ch;
+  unsigned char char_boundary = str->iso2022.current_char_boundary;
+  Lisp_Object charset = str->iso2022.current_charset;
+
+#ifdef ENABLE_COMPOSITE_CHARS
+  /* flags for handling composite chars.  We do a little switcharoo
+     on the source while we're outputting the composite char. */
+  unsigned int saved_n = 0;
+  CONST unsigned char *saved_src = NULL;
+  int in_composite = 0;
+
+ back_to_square_n:
+#endif
+
+  while (n--)
+    {
+      unsigned char c = *src++;
+
+      if (BYTE_ASCII_P (c))
+       {               /* Processing ASCII character */
+         ch = 0;
+         encode_ucs4 (Vcharset_ascii, c, 0, dst);
+         char_boundary = 1;
+       }
+      else if (BUFBYTE_LEADING_BYTE_P (c) || BUFBYTE_LEADING_BYTE_P (ch))
+       { /* Processing Leading Byte */
+         ch = 0;
+         charset = CHARSET_BY_LEADING_BYTE (c);
+         if (LEADING_BYTE_PREFIX_P(c))
+           ch = c;
+         char_boundary = 0;
+       }
+      else
+       {                       /* Processing Non-ASCII character */
+         char_boundary = 1;
+         if (EQ (charset, Vcharset_control_1))
+           {
+             encode_ucs4 (Vcharset_control_1, c, 0, dst);
+           }
+         else
+           {
+             switch (XCHARSET_REP_BYTES (charset))
+               {
+               case 2:
+                 encode_ucs4 (charset, c, 0, dst);
+                 break;
+               case 3:
+                 if (XCHARSET_PRIVATE_P (charset))
+                   {
+                     encode_ucs4 (charset, c, 0, dst);
+                     ch = 0;
+                   }
+                 else if (ch)
+                   {
+#ifdef ENABLE_COMPOSITE_CHARS
+                     if (EQ (charset, Vcharset_composite))
+                       {
+                         if (in_composite)
+                           {
+                             /* #### Bother! We don't know how to
+                                handle this yet. */
+                             Dynarr_add (dst, 0);
+                             Dynarr_add (dst, 0);
+                             Dynarr_add (dst, 0);
+                             Dynarr_add (dst, '~');
+                           }
+                         else
+                           {
+                             Emchar emch = MAKE_CHAR (Vcharset_composite,
+                                                      ch & 0x7F, c & 0x7F);
+                             Lisp_Object lstr = composite_char_string (emch);
+                             saved_n = n;
+                             saved_src = src;
+                             in_composite = 1;
+                             src = XSTRING_DATA   (lstr);
+                             n   = XSTRING_LENGTH (lstr);
+                           }
+                       }
+                     else
+#endif /* ENABLE_COMPOSITE_CHARS */
+                       {
+                         encode_ucs4(charset, ch, c, dst);
+                       }
+                     ch = 0;
+                   }
+                 else
+                   {
+                     ch = c;
+                     char_boundary = 0;
+                   }
+                 break;
+               case 4:
+                 if (ch)
+                   {
+                     encode_ucs4 (charset, ch, c, dst);
+                     ch = 0;
+                   }
+                 else
+                   {
+                     ch = c;
+                     char_boundary = 0;
+                   }
+                 break;
+               default:
+                 abort ();
+               }
+           }
+       }
+    }
+
+#ifdef ENABLE_COMPOSITE_CHARS
+  if (in_composite)
+    {
+      n = saved_n;
+      src = saved_src;
+      in_composite = 0;
+      goto back_to_square_n; /* Wheeeeeeeee ..... */
+    }
+#endif /* ENABLE_COMPOSITE_CHARS */
+
+  str->flags = flags;
+  str->ch = ch;
+  str->iso2022.current_char_boundary = char_boundary;
+  str->iso2022.current_charset = charset;
+
+  /* Verbum caro factum est! */
+#endif
+}
+
+\f
+/************************************************************************/
+/*                           UTF-8 methods                              */
+/************************************************************************/
+
+static int
+detect_coding_utf8 (struct detection_state *st, CONST unsigned char *src,
+                   unsigned int n)
+{
+  while (n--)
+    {
+      unsigned char c = *src++;
+      switch (st->utf8.in_byte)
+       {
+       case 0:
+         if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
+           return 0;
+         else if (c >= 0xfc)
+           st->utf8.in_byte = 5;
+         else if (c >= 0xf8)
+           st->utf8.in_byte = 4;
+         else if (c >= 0xf0)
+           st->utf8.in_byte = 3;
+         else if (c >= 0xe0)
+           st->utf8.in_byte = 2;
+         else if (c >= 0xc0)
+           st->utf8.in_byte = 1;
+         else if (c >= 0x80)
+           return 0;
+         break;
+       default:
+         if ((c & 0xc0) != 0x80)
+           return 0;
+         else
+           st->utf8.in_byte--;
+       }
+    }
+  return CODING_CATEGORY_UTF8_MASK;
+}
+
+static void
+decode_coding_utf8 (Lstream *decoding, CONST unsigned char *src,
+                   unsigned_char_dynarr *dst, unsigned int n)
+{
+  struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
+  unsigned int flags  = str->flags;
+  unsigned int ch     = str->ch;
+  eol_type_t eol_type = str->eol_type;
+  unsigned char counter = str->counter;
+
+  while (n--)
+    {
+      unsigned char c = *src++;
+      switch (counter)
+       {
+       case 0:
+         if ( c >= 0xfc )
+           {
+             ch = c & 0x01;
+             counter = 5;
+           }
+         else if ( c >= 0xf8 )
+           {
+             ch = c & 0x03;
+             counter = 4;
+           }
+         else if ( c >= 0xf0 )
+           {
+             ch = c & 0x07;
+             counter = 3;
+           }
+         else if ( c >= 0xe0 )
+           {
+             ch = c & 0x0f;
+             counter = 2;
+           }
+         else if ( c >= 0xc0 )
+           {
+             ch = c & 0x1f;
+             counter = 1;
+           }
+         else
+           {
+             DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst);
+             decode_ucs4 (c, dst);
+           }
+         break;
+       case 1:
+         ch = ( ch << 6 ) | ( c & 0x3f );
+         decode_ucs4 (ch, dst);
+         ch = 0;
+         counter = 0;
+         break;
+       default:
+         ch = ( ch << 6 ) | ( c & 0x3f );
+         counter--;
+       }
+    label_continue_loop:;
+    }
+
+  if (flags & CODING_STATE_END)
+    DECODE_OUTPUT_PARTIAL_CHAR (ch);
+
+  str->flags = flags;
+  str->ch    = ch;
+  str->counter = counter;
+}
+
+void
+char_encode_utf8 (struct encoding_stream *str, Emchar code,
+                 unsigned_char_dynarr *dst, unsigned int *flags)
+{
+  if ( code <= 0x7f )
+    {
+      Dynarr_add (dst, code);
+    }
+  else if ( code <= 0x7ff )
+    {
+      Dynarr_add (dst, (code >> 6) | 0xc0);
+      Dynarr_add (dst, (code & 0x3f) | 0x80);
+    }
+  else if ( code <= 0xffff )
+    {
+      Dynarr_add (dst,  (code >> 12) | 0xe0);
+      Dynarr_add (dst, ((code >>  6) & 0x3f) | 0x80);
+      Dynarr_add (dst,  (code        & 0x3f) | 0x80);
+    }
+  else if ( code <= 0x1fffff )
+    {
+      Dynarr_add (dst,  (code >> 18) | 0xf0);
+      Dynarr_add (dst, ((code >> 12) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((code >>  6) & 0x3f) | 0x80);
+      Dynarr_add (dst,  (code        & 0x3f) | 0x80);
+    }
+  else if ( code <= 0x3ffffff )
+    {
+      Dynarr_add (dst,  (code >> 24) | 0xf8);
+      Dynarr_add (dst, ((code >> 18) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((code >> 12) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((code >>  6) & 0x3f) | 0x80);
+      Dynarr_add (dst,  (code        & 0x3f) | 0x80);
+    }
+  else
+    {
+      Dynarr_add (dst,  (code >> 30) | 0xfc);
+      Dynarr_add (dst, ((code >> 24) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((code >> 18) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((code >> 12) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((code >>  6) & 0x3f) | 0x80);
+      Dynarr_add (dst,  (code        & 0x3f) | 0x80);
+    }
+}
+
+void
+char_finish_utf8 (struct encoding_stream *str, unsigned_char_dynarr *dst,
+                 unsigned int *flags)
+{
+}
+
+\f
+/************************************************************************/
+/*                           ISO2022 methods                            */
+/************************************************************************/
+
+/* The following note describes the coding system ISO2022 briefly.
+   Since the intention of this note is to help understand the
+   functions in this file, some parts are NOT ACCURATE or OVERLY
+   SIMPLIFIED.  For thorough understanding, please refer to the
+   original document of ISO2022.
+
+   ISO2022 provides many mechanisms to encode several character sets
+   in 7-bit and 8-bit environments.  For 7-bit environments, all text
+   is encoded using bytes less than 128.  This may make the encoded
+   text a little bit longer, but the text passes more easily through
+   several gateways, some of which strip off MSB (Most Signigant Bit).
+
+   There are two kinds of character sets: control character set and
+   graphic character set.  The former contains control characters such
+   as `newline' and `escape' to provide control functions (control
+   functions are also provided by escape sequences).  The latter
+   contains graphic characters such as 'A' and '-'.  Emacs recognizes
+   two control character sets and many graphic character sets.
+
+   Graphic character sets are classified into one of the following
+   four classes, according to the number of bytes (DIMENSION) and
+   number of characters in one dimension (CHARS) of the set:
+   - DIMENSION1_CHARS94
+   - DIMENSION1_CHARS96
+   - DIMENSION2_CHARS94
+   - DIMENSION2_CHARS96
+
+   In addition, each character set is assigned an identification tag,
+   unique for each set, called "final character" (denoted as <F>
+   hereafter).  The <F> of each character set is decided by ECMA(*)
+   when it is registered in ISO.  The code range of <F> is 0x30..0x7F
+   (0x30..0x3F are for private use only).
+
+   Note (*): ECMA = European Computer Manufacturers Association
+
+   Here are examples of graphic character set [NAME(<F>)]:
+       o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
+       o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
+       o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
+       o DIMENSION2_CHARS96 -- none for the moment
+
+   A code area (1 byte = 8 bits) is divided into 4 areas, C0, GL, C1, and GR.
+       C0 [0x00..0x1F] -- control character plane 0
+       GL [0x20..0x7F] -- graphic character plane 0
+       C1 [0x80..0x9F] -- control character plane 1
+       GR [0xA0..0xFF] -- graphic character plane 1
+
+   A control character set is directly designated and invoked to C0 or
+   C1 by an escape sequence.  The most common case is that:
+   - ISO646's  control character set is designated/invoked to C0, and
+   - ISO6429's control character set is designated/invoked to C1,
+   and usually these designations/invocations are omitted in encoded
+   text.  In a 7-bit environment, only C0 can be used, and a control
+   character for C1 is encoded by an appropriate escape sequence to
+   fit into the environment.  All control characters for C1 are
+   defined to have corresponding escape sequences.
+
+   A graphic character set is at first designated to one of four
+   graphic registers (G0 through G3), then these graphic registers are
+   invoked to GL or GR.  These designations and invocations can be
+   done independently.  The most common case is that G0 is invoked to
+   GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
+   these invocations and designations are omitted in encoded text.
+   In a 7-bit environment, only GL can be used.
+
+   When a graphic character set of CHARS94 is invoked to GL, codes
+   0x20 and 0x7F of the GL area work as control characters SPACE and
+   DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
+   be used.
+
+   There are two ways of invocation: locking-shift and single-shift.
+   With locking-shift, the invocation lasts until the next different
+   invocation, whereas with single-shift, the invocation affects the
+   following character only and doesn't affect the locking-shift
+   state.  Invocations are done by the following control characters or
+   escape sequences:
+
+   ----------------------------------------------------------------------
+   abbrev  function                 cntrl escape seq   description
+   ----------------------------------------------------------------------
+   SI/LS0  (shift-in)               0x0F  none         invoke G0 into GL
+   SO/LS1  (shift-out)              0x0E  none         invoke G1 into GL
+   LS2     (locking-shift-2)        none  ESC 'n'      invoke G2 into GL
+   LS3     (locking-shift-3)        none  ESC 'o'      invoke G3 into GL
+   LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
+   LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
+   LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
+   SS2     (single-shift-2)         0x8E  ESC 'N'      invoke G2 for one char
+   SS3     (single-shift-3)         0x8F  ESC 'O'      invoke G3 for one char
+   ----------------------------------------------------------------------
+   (*) These are not used by any known coding system.
+
+   Control characters for these functions are defined by macros
+   ISO_CODE_XXX in `coding.h'.
+
+   Designations are done by the following escape sequences:
+   ----------------------------------------------------------------------
+   escape sequence     description
+   ----------------------------------------------------------------------
+   ESC '(' <F>         designate DIMENSION1_CHARS94<F> to G0
+   ESC ')' <F>         designate DIMENSION1_CHARS94<F> to G1
+   ESC '*' <F>         designate DIMENSION1_CHARS94<F> to G2
+   ESC '+' <F>         designate DIMENSION1_CHARS94<F> to G3
+   ESC ',' <F>         designate DIMENSION1_CHARS96<F> to G0 (*)
+   ESC '-' <F>         designate DIMENSION1_CHARS96<F> to G1
+   ESC '.' <F>         designate DIMENSION1_CHARS96<F> to G2
+   ESC '/' <F>         designate DIMENSION1_CHARS96<F> to G3
+   ESC '$' '(' <F>     designate DIMENSION2_CHARS94<F> to G0 (**)
+   ESC '$' ')' <F>     designate DIMENSION2_CHARS94<F> to G1
+   ESC '$' '*' <F>     designate DIMENSION2_CHARS94<F> to G2
+   ESC '$' '+' <F>     designate DIMENSION2_CHARS94<F> to G3
+   ESC '$' ',' <F>     designate DIMENSION2_CHARS96<F> to G0 (*)
+   ESC '$' '-' <F>     designate DIMENSION2_CHARS96<F> to G1
+   ESC '$' '.' <F>     designate DIMENSION2_CHARS96<F> to G2
+   ESC '$' '/' <F>     designate DIMENSION2_CHARS96<F> to G3
+   ----------------------------------------------------------------------
+
+   In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
+   of dimension 1, chars 94, and final character <F>, etc...
+
+   Note (*): Although these designations are not allowed in ISO2022,
+   Emacs accepts them on decoding, and produces them on encoding
+   CHARS96 character sets in a coding system which is characterized as
+   7-bit environment, non-locking-shift, and non-single-shift.
+
+   Note (**): If <F> is '@', 'A', or 'B', the intermediate character
+   '(' can be omitted.  We refer to this as "short-form" hereafter.
+
+   Now you may notice that there are a lot of ways for encoding the
+   same multilingual text in ISO2022.  Actually, there exist many
+   coding systems such as Compound Text (used in X11's inter client
+   communication, ISO-2022-JP (used in Japanese internet), ISO-2022-KR
+   (used in Korean internet), EUC (Extended UNIX Code, used in Asian
+   localized platforms), and all of these are variants of ISO2022.
+
+   In addition to the above, Emacs handles two more kinds of escape
+   sequences: ISO6429's direction specification and Emacs' private
+   sequence for specifying character composition.
+
+   ISO6429's direction specification takes the following form:
+       o CSI ']'      -- end of the current direction
+       o CSI '0' ']'  -- end of the current direction
+       o CSI '1' ']'  -- start of left-to-right text
+       o CSI '2' ']'  -- start of right-to-left text
+   The control character CSI (0x9B: control sequence introducer) is
+   abbreviated to the escape sequence ESC '[' in a 7-bit environment.
+
+   Character composition specification takes the following form:
+       o ESC '0' -- start character composition
+       o ESC '1' -- end character composition
+   Since these are not standard escape sequences of any ISO standard,
+   their use with these meanings is restricted to Emacs only.  */
+
+static void
+reset_iso2022 (Lisp_Object coding_system, struct iso2022_decoder *iso)
+{
+  int i;
+
+  for (i = 0; i < 4; i++)
+    {
+      if (!NILP (coding_system))
+       iso->charset[i] =
+         XCODING_SYSTEM_ISO2022_INITIAL_CHARSET (coding_system, i);
+      else
+       iso->charset[i] = Qt;
+      iso->invalid_designated[i] = 0;
+    }
+  iso->esc = ISO_ESC_NOTHING;
+  iso->esc_bytes_index = 0;
+  iso->register_left = 0;
+  iso->register_right = 1;
+  iso->switched_dir_and_no_valid_charset_yet = 0;
+  iso->invalid_switch_dir = 0;
+  iso->output_direction_sequence = 0;
+  iso->output_literally = 0;
+#ifdef ENABLE_COMPOSITE_CHARS
+  if (iso->composite_chars)
+    Dynarr_reset (iso->composite_chars);
+#endif
+}
+
+static int
+fit_to_be_escape_quoted (unsigned char c)
+{
+  switch (c)
+    {
+    case ISO_CODE_ESC:
+    case ISO_CODE_CSI:
+    case ISO_CODE_SS2:
+    case ISO_CODE_SS3:
+    case ISO_CODE_SO:
+    case ISO_CODE_SI:
+      return 1;
+
+    default:
+      return 0;
+    }
+}
+
+/* Parse one byte of an ISO2022 escape sequence.
+   If the result is an invalid escape sequence, return 0 and
+   do not change anything in STR.  Otherwise, if the result is
+   an incomplete escape sequence, update ISO2022.ESC and
+   ISO2022.ESC_BYTES and return -1.  Otherwise, update
+   all the state variables (but not ISO2022.ESC_BYTES) and
+   return 1.
+
+   If CHECK_INVALID_CHARSETS is non-zero, check for designation
+   or invocation of an invalid character set and treat that as
+   an unrecognized escape sequence. */
+
+static int
+parse_iso2022_esc (Lisp_Object codesys, struct iso2022_decoder *iso,
+                  unsigned char c, unsigned int *flags,
+                  int check_invalid_charsets)
+{
+  /* (1) If we're at the end of a designation sequence, CS is the
+     charset being designated and REG is the register to designate
+     it to.
+
+     (2) If we're at the end of a locking-shift sequence, REG is
+     the register to invoke and HALF (0 == left, 1 == right) is
+     the half to invoke it into.
+
+     (3) If we're at the end of a single-shift sequence, REG is
+     the register to invoke. */
+  Lisp_Object cs = Qnil;
+  int reg, half;
+
+  /* NOTE: This code does goto's all over the fucking place.
+     The reason for this is that we're basically implementing
+     a state machine here, and hierarchical languages like C
+     don't really provide a clean way of doing this. */
+
+  if (! (*flags & CODING_STATE_ESCAPE))
+    /* At beginning of escape sequence; we need to reset our
+       escape-state variables. */
+    iso->esc = ISO_ESC_NOTHING;
+
+  iso->output_literally = 0;
+  iso->output_direction_sequence = 0;
+
+  switch (iso->esc)
+    {
+    case ISO_ESC_NOTHING:
+      iso->esc_bytes_index = 0;
+      switch (c)
+       {
+       case ISO_CODE_ESC:      /* Start escape sequence */
+         *flags |= CODING_STATE_ESCAPE;
+         iso->esc = ISO_ESC;
+         goto not_done;
+
+       case ISO_CODE_CSI:      /* ISO6429 (specifying directionality) */
+         *flags |= CODING_STATE_ESCAPE;
+         iso->esc = ISO_ESC_5_11;
+         goto not_done;
+
+       case ISO_CODE_SO:       /* locking shift 1 */
+         reg = 1; half = 0;
+         goto locking_shift;
+       case ISO_CODE_SI:       /* locking shift 0 */
+         reg = 0; half = 0;
+         goto locking_shift;
+
+       case ISO_CODE_SS2:      /* single shift */
+         reg = 2;
+         goto single_shift;
+       case ISO_CODE_SS3:      /* single shift */
+         reg = 3;
+         goto single_shift;
+
+       default:                        /* Other control characters */
+         return 0;
+       }
+
+    case ISO_ESC:
+      switch (c)
+       {
+         /**** single shift ****/
+
+       case 'N':       /* single shift 2 */
+         reg = 2;
+         goto single_shift;
+       case 'O':       /* single shift 3 */
+         reg = 3;
+         goto single_shift;
+
+         /**** locking shift ****/
+
+       case '~':       /* locking shift 1 right */
+         reg = 1; half = 1;
+         goto locking_shift;
+       case 'n':       /* locking shift 2 */
+         reg = 2; half = 0;
+         goto locking_shift;
+       case '}':       /* locking shift 2 right */
+         reg = 2; half = 1;
+         goto locking_shift;
+       case 'o':       /* locking shift 3 */
+         reg = 3; half = 0;
+         goto locking_shift;
+       case '|':       /* locking shift 3 right */
+         reg = 3; half = 1;
+         goto locking_shift;
+
+#ifdef ENABLE_COMPOSITE_CHARS
+         /**** composite ****/
+
+       case '0':
+         iso->esc = ISO_ESC_START_COMPOSITE;
+         *flags = (*flags & CODING_STATE_ISO2022_LOCK) |
+           CODING_STATE_COMPOSITE;
+         return 1;
+
+       case '1':
+         iso->esc = ISO_ESC_END_COMPOSITE;
+         *flags = (*flags & CODING_STATE_ISO2022_LOCK) &
+           ~CODING_STATE_COMPOSITE;
+         return 1;
+#endif /* ENABLE_COMPOSITE_CHARS */
+
+         /**** directionality ****/
+
+       case '[':
+         iso->esc = ISO_ESC_5_11;
+         goto not_done;
+
+         /**** designation ****/
+
+       case '$':       /* multibyte charset prefix */
+         iso->esc = ISO_ESC_2_4;
+         goto not_done;
+
+       default:
+         if (0x28 <= c && c <= 0x2F)
+           {
+             iso->esc = (enum iso_esc_flag) (c - 0x28 + ISO_ESC_2_8);
+             goto not_done;
+           }
+
+         /* This function is called with CODESYS equal to nil when
+            doing coding-system detection. */
+         if (!NILP (codesys)
+             && XCODING_SYSTEM_ISO2022_ESCAPE_QUOTED (codesys)
+             && fit_to_be_escape_quoted (c))
+           {
+             iso->esc = ISO_ESC_LITERAL;
+             *flags &= CODING_STATE_ISO2022_LOCK;
+             return 1;
+           }
+
+         /* bzzzt! */
+         return 0;
+       }
+
+
+
+      /**** directionality ****/
+
+    case ISO_ESC_5_11:         /* ISO6429 direction control */
+      if (c == ']')
+       {
+         *flags &= (CODING_STATE_ISO2022_LOCK & ~CODING_STATE_R2L);
+         goto directionality;
+       }
+      if      (c == '0') iso->esc = ISO_ESC_5_11_0;
+      else if (c == '1') iso->esc = ISO_ESC_5_11_1;
+      else if (c == '2') iso->esc = ISO_ESC_5_11_2;
+      else               return 0;
+      goto not_done;
+
+    case ISO_ESC_5_11_0:
+      if (c == ']')
+       {
+         *flags &= (CODING_STATE_ISO2022_LOCK & ~CODING_STATE_R2L);
+         goto directionality;
+       }
+      return 0;
+
+    case ISO_ESC_5_11_1:
+      if (c == ']')
+       {
+         *flags = (CODING_STATE_ISO2022_LOCK & ~CODING_STATE_R2L);
+         goto directionality;
+       }
+      return 0;
+
+    case ISO_ESC_5_11_2:
+      if (c == ']')
+       {
+         *flags = (*flags & CODING_STATE_ISO2022_LOCK) | CODING_STATE_R2L;
+         goto directionality;
+       }
+      return 0;
+
+    directionality:
+      iso->esc = ISO_ESC_DIRECTIONALITY;
+      /* Various junk here to attempt to preserve the direction sequences
+        literally in the text if they would otherwise be swallowed due
+        to invalid designations that don't show up as actual charset
+        changes in the text. */
+      if (iso->invalid_switch_dir)
+       {
+         /* We already inserted a direction switch literally into the
+            text.  We assume (#### this may not be right) that the
+            next direction switch is the one going the other way,
+            and we need to output that literally as well. */
+         iso->output_literally = 1;
+         iso->invalid_switch_dir = 0;
+       }
+      else
+       {
+         int jj;
+
+         /* If we are in the thrall of an invalid designation,
+          then stick the directionality sequence literally into the
+          output stream so it ends up in the original text again. */
+         for (jj = 0; jj < 4; jj++)
+           if (iso->invalid_designated[jj])
+             break;
+         if (jj < 4)
+           {
+             iso->output_literally = 1;
+             iso->invalid_switch_dir = 1;
+           }
+         else
+           /* Indicate that we haven't yet seen a valid designation,
+              so that if a switch-dir is directly followed by an
+              invalid designation, both get inserted literally. */
+           iso->switched_dir_and_no_valid_charset_yet = 1;
+       }
+      return 1;
+
+
+      /**** designation ****/
+
+    case ISO_ESC_2_4:
+      if (0x28 <= c && c <= 0x2F)
+       {
+         iso->esc = (enum iso_esc_flag) (c - 0x28 + ISO_ESC_2_4_8);
+         goto not_done;
+       }
+      if (0x40 <= c && c <= 0x42)
+       {
+         cs = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94X94, c,
+                                     *flags & CODING_STATE_R2L ?
+                                     CHARSET_RIGHT_TO_LEFT :
+                                     CHARSET_LEFT_TO_RIGHT);
+         reg = 0;
+         goto designated;
+       }
+      return 0;
+
+    default:
+      {
+       int type =-1;
+
+       if (c < '0' || c > '~')
+         return 0; /* bad final byte */
+
+       if (iso->esc >= ISO_ESC_2_8 &&
+           iso->esc <= ISO_ESC_2_15)
+         {
+           type = ((iso->esc >= ISO_ESC_2_12) ?
+                   CHARSET_TYPE_96 : CHARSET_TYPE_94);
+           reg = (iso->esc - ISO_ESC_2_8) & 3;
+         }
+       else if (iso->esc >= ISO_ESC_2_4_8 &&
+                iso->esc <= ISO_ESC_2_4_15)
+         {
+           type = ((iso->esc >= ISO_ESC_2_4_12) ?
+                   CHARSET_TYPE_96X96 : CHARSET_TYPE_94X94);
+           reg = (iso->esc - ISO_ESC_2_4_8) & 3;
+         }
+       else
+         {
+           /* Can this ever be reached? -slb */
+           abort();
+         }
+
+       cs = CHARSET_BY_ATTRIBUTES (type, c,
+                                   *flags & CODING_STATE_R2L ?
+                                   CHARSET_RIGHT_TO_LEFT :
+                                   CHARSET_LEFT_TO_RIGHT);
+       goto designated;
+      }
+    }
+
+ not_done:
+  iso->esc_bytes[iso->esc_bytes_index++] = (unsigned char) c;
+  return -1;
+
+ single_shift:
+  if (check_invalid_charsets && !CHARSETP (iso->charset[reg]))
+    /* can't invoke something that ain't there. */
+    return 0;
+  iso->esc = ISO_ESC_SINGLE_SHIFT;
+  *flags &= CODING_STATE_ISO2022_LOCK;
+  if (reg == 2)
+    *flags |= CODING_STATE_SS2;
+  else
+    *flags |= CODING_STATE_SS3;
+  return 1;
+
+ locking_shift:
+  if (check_invalid_charsets &&
+      !CHARSETP (iso->charset[reg]))
+    /* can't invoke something that ain't there. */
+    return 0;
+  if (half)
+    iso->register_right = reg;
+  else
+    iso->register_left = reg;
+  *flags &= CODING_STATE_ISO2022_LOCK;
+  iso->esc = ISO_ESC_LOCKING_SHIFT;
+  return 1;
+
+ designated:
+  if (NILP (cs) && check_invalid_charsets)
+    {
+      iso->invalid_designated[reg] = 1;
+      iso->charset[reg] = Vcharset_ascii;
+      iso->esc = ISO_ESC_DESIGNATE;
+      *flags &= CODING_STATE_ISO2022_LOCK;
+      iso->output_literally = 1;
+      if (iso->switched_dir_and_no_valid_charset_yet)
+       {
+         /* We encountered a switch-direction followed by an
+            invalid designation.  Ensure that the switch-direction
+            gets outputted; otherwise it will probably get eaten
+            when the text is written out again. */
+         iso->switched_dir_and_no_valid_charset_yet = 0;
+         iso->output_direction_sequence = 1;
+         /* And make sure that the switch-dir going the other
+            way gets outputted, as well. */
+         iso->invalid_switch_dir = 1;
+       }
+      return 1;
+    }
+  /* This function is called with CODESYS equal to nil when
+     doing coding-system detection. */
+  if (!NILP (codesys))
+    {
+      charset_conversion_spec_dynarr *dyn =
+       XCODING_SYSTEM (codesys)->iso2022.input_conv;
+
+      if (dyn)
+       {
+         int i;
+
+         for (i = 0; i < Dynarr_length (dyn); i++)
+           {
+             struct charset_conversion_spec *spec = Dynarr_atp (dyn, i);
+             if (EQ (cs, spec->from_charset))
+               cs = spec->to_charset;
+           }
+       }
+    }
+
+  iso->charset[reg] = cs;
+  iso->esc = ISO_ESC_DESIGNATE;
+  *flags &= CODING_STATE_ISO2022_LOCK;
+  if (iso->invalid_designated[reg])
+    {
+      iso->invalid_designated[reg] = 0;
+      iso->output_literally = 1;
+    }
+  if (iso->switched_dir_and_no_valid_charset_yet)
+    iso->switched_dir_and_no_valid_charset_yet = 0;
+  return 1;
+}
+
+static int
+detect_coding_iso2022 (struct detection_state *st, CONST unsigned char *src,
+                      unsigned int n)
+{
+  int mask;
+
+  /* #### There are serious deficiencies in the recognition mechanism
+     here.  This needs to be much smarter if it's going to cut it.
+     The sequence "\xff\x0f" is currently detected as LOCK_SHIFT while
+     it should be detected as Latin-1.
+     All the ISO2022 stuff in this file should be synced up with the
+     code from FSF Emacs-20.4, in which Mule should be more or less stable.
+     Perhaps we should wait till R2L works in FSF Emacs? */
+
+  if (!st->iso2022.initted)
+    {
+      reset_iso2022 (Qnil, &st->iso2022.iso);
+      st->iso2022.mask = (CODING_CATEGORY_ISO_7_MASK |
+                         CODING_CATEGORY_ISO_8_DESIGNATE_MASK |
+                         CODING_CATEGORY_ISO_8_1_MASK |
+                         CODING_CATEGORY_ISO_8_2_MASK |
+                         CODING_CATEGORY_ISO_LOCK_SHIFT_MASK);
+      st->iso2022.flags = 0;
+      st->iso2022.high_byte_count = 0;
+      st->iso2022.saw_single_shift = 0;
+      st->iso2022.initted = 1;
+    }
+
+  mask = st->iso2022.mask;
+
+  while (n--)
+    {
+      int c = *src++;
+      if (c >= 0xA0)
+       {
+         mask &= ~CODING_CATEGORY_ISO_7_MASK;
+         st->iso2022.high_byte_count++;
+       }
+      else
+       {
+         if (st->iso2022.high_byte_count && !st->iso2022.saw_single_shift)
+           {
+             if (st->iso2022.high_byte_count & 1)
+               /* odd number of high bytes; assume not iso-8-2 */
+               mask &= ~CODING_CATEGORY_ISO_8_2_MASK;
+           }
+         st->iso2022.high_byte_count = 0;
+         st->iso2022.saw_single_shift = 0;
+         if (c > 0x80)
+           mask &= ~CODING_CATEGORY_ISO_7_MASK;
+       }
+      if (!(st->iso2022.flags & CODING_STATE_ESCAPE)
+         && (BYTE_C0_P (c) || BYTE_C1_P (c)))
+       { /* control chars */
+         switch (c)
+           {
+             /* Allow and ignore control characters that you might
+                reasonably see in a text file */
+           case '\r':
+           case '\n':
+           case '\t':
+           case  7: /* bell */
+           case  8: /* backspace */
+           case 11: /* vertical tab */
+           case 12: /* form feed */
+           case 26: /* MS-DOS C-z junk */
+           case 31: /* '^_' -- for info */
+             goto label_continue_loop;
+
+           default:
+             break;
+           }
+       }
+
+      if ((st->iso2022.flags & CODING_STATE_ESCAPE) || BYTE_C0_P (c)
+          || BYTE_C1_P (c))
+       {
+         if (parse_iso2022_esc (Qnil, &st->iso2022.iso, c,
+                                &st->iso2022.flags, 0))
+           {
+             switch (st->iso2022.iso.esc)
+               {
+               case ISO_ESC_DESIGNATE:
+                 mask &= ~CODING_CATEGORY_ISO_8_1_MASK;
+                 mask &= ~CODING_CATEGORY_ISO_8_2_MASK;
+                 break;
+               case ISO_ESC_LOCKING_SHIFT:
+                 mask = CODING_CATEGORY_ISO_LOCK_SHIFT_MASK;
+                 goto ran_out_of_chars;
+               case ISO_ESC_SINGLE_SHIFT:
+                 mask &= ~CODING_CATEGORY_ISO_8_DESIGNATE_MASK;
+                 st->iso2022.saw_single_shift = 1;
+                 break;
+               default:
+                 break;
+               }
+           }
+         else
+           {
+             mask = 0;
+             goto ran_out_of_chars;
+           }
+       }
+    label_continue_loop:;
+    }
+
+ ran_out_of_chars:
+
+  return mask;
+}
+
+static int
+postprocess_iso2022_mask (int mask)
+{
+  /* #### kind of cheesy */
+  /* If seven-bit ISO is allowed, then assume that the encoding is
+     entirely seven-bit and turn off the eight-bit ones. */
+  if (mask & CODING_CATEGORY_ISO_7_MASK)
+    mask &= ~ (CODING_CATEGORY_ISO_8_DESIGNATE_MASK |
+              CODING_CATEGORY_ISO_8_1_MASK |
+              CODING_CATEGORY_ISO_8_2_MASK);
+  return mask;
+}
+
+/* If FLAGS is a null pointer or specifies right-to-left motion,
+   output a switch-dir-to-left-to-right sequence to DST.
+   Also update FLAGS if it is not a null pointer.
+   If INTERNAL_P is set, we are outputting in internal format and
+   need to handle the CSI differently. */
+
+static void
+restore_left_to_right_direction (Lisp_Coding_System *codesys,
+                                unsigned_char_dynarr *dst,
+                                unsigned int *flags,
+                                int internal_p)
+{
+  if (!flags || (*flags & CODING_STATE_R2L))
+    {
+      if (CODING_SYSTEM_ISO2022_SEVEN (codesys))
+       {
+         Dynarr_add (dst, ISO_CODE_ESC);
+         Dynarr_add (dst, '[');
+       }
+      else if (internal_p)
+       DECODE_ADD_BINARY_CHAR (ISO_CODE_CSI, dst);
+      else
+       Dynarr_add (dst, ISO_CODE_CSI);
+      Dynarr_add (dst, '0');
+      Dynarr_add (dst, ']');
+      if (flags)
+       *flags &= ~CODING_STATE_R2L;
+    }
+}
+
+/* If FLAGS is a null pointer or specifies a direction different from
+   DIRECTION (which should be either CHARSET_RIGHT_TO_LEFT or
+   CHARSET_LEFT_TO_RIGHT), output the appropriate switch-dir escape
+   sequence to DST.  Also update FLAGS if it is not a null pointer.
+   If INTERNAL_P is set, we are outputting in internal format and
+   need to handle the CSI differently. */
+
+static void
+ensure_correct_direction (int direction, Lisp_Coding_System *codesys,
+                         unsigned_char_dynarr *dst, unsigned int *flags,
+                         int internal_p)
+{
+  if ((!flags || (*flags & CODING_STATE_R2L)) &&
+      direction == CHARSET_LEFT_TO_RIGHT)
+    restore_left_to_right_direction (codesys, dst, flags, internal_p);
+  else if (!CODING_SYSTEM_ISO2022_NO_ISO6429 (codesys)
+          && (!flags || !(*flags & CODING_STATE_R2L)) &&
+          direction == CHARSET_RIGHT_TO_LEFT)
+    {
+      if (CODING_SYSTEM_ISO2022_SEVEN (codesys))
+       {
+         Dynarr_add (dst, ISO_CODE_ESC);
+         Dynarr_add (dst, '[');
+       }
+      else if (internal_p)
+       DECODE_ADD_BINARY_CHAR (ISO_CODE_CSI, dst);
+      else
+       Dynarr_add (dst, ISO_CODE_CSI);
+      Dynarr_add (dst, '2');
+      Dynarr_add (dst, ']');
+      if (flags)
+       *flags |= CODING_STATE_R2L;
+    }
+}
+
+/* Convert ISO2022-format data to internal format. */
+
+static void
+decode_coding_iso2022 (Lstream *decoding, CONST unsigned char *src,
+                      unsigned_char_dynarr *dst, unsigned int n)
+{
+  struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
+  unsigned int flags  = str->flags;
+  unsigned int ch     = str->ch;
+  eol_type_t eol_type = str->eol_type;
+#ifdef ENABLE_COMPOSITE_CHARS
+  unsigned_char_dynarr *real_dst = dst;
+#endif
+  Lisp_Object coding_system;
+
+  XSETCODING_SYSTEM (coding_system, str->codesys);
+
+#ifdef ENABLE_COMPOSITE_CHARS
+  if (flags & CODING_STATE_COMPOSITE)
+    dst = str->iso2022.composite_chars;
+#endif /* ENABLE_COMPOSITE_CHARS */
+
+  while (n--)
+    {
+      unsigned char c = *src++;
+      if (flags & CODING_STATE_ESCAPE)
+       {       /* Within ESC sequence */
+         int retval = parse_iso2022_esc (coding_system, &str->iso2022,
+                                         c, &flags, 1);
+
+         if (retval)
+           {
+             switch (str->iso2022.esc)
+               {
+#ifdef ENABLE_COMPOSITE_CHARS
+               case ISO_ESC_START_COMPOSITE:
+                 if (str->iso2022.composite_chars)
+                   Dynarr_reset (str->iso2022.composite_chars);
+                 else
+                   str->iso2022.composite_chars = Dynarr_new (unsigned_char);
+                 dst = str->iso2022.composite_chars;
+                 break;
+               case ISO_ESC_END_COMPOSITE:
+                 {
+                   Bufbyte comstr[MAX_EMCHAR_LEN];
+                   Bytecount len;
+                   Emchar emch = lookup_composite_char (Dynarr_atp (dst, 0),
+                                                        Dynarr_length (dst));
+                   dst = real_dst;
+                   len = set_charptr_emchar (comstr, emch);
+                   Dynarr_add_many (dst, comstr, len);
+                   break;
+                 }
+#endif /* ENABLE_COMPOSITE_CHARS */
+
+               case ISO_ESC_LITERAL:
+                 DECODE_ADD_BINARY_CHAR (c, dst);
+                 break;
+
+               default:
+                 /* Everything else handled already */
+                 break;
+               }
+           }
+
+         /* Attempted error recovery. */
+         if (str->iso2022.output_direction_sequence)
+           ensure_correct_direction (flags & CODING_STATE_R2L ?
+                                     CHARSET_RIGHT_TO_LEFT :
+                                     CHARSET_LEFT_TO_RIGHT,
+                                     str->codesys, dst, 0, 1);
+         /* More error recovery. */
+         if (!retval || str->iso2022.output_literally)
+           {
+             /* Output the (possibly invalid) sequence */
+             int i;
+             for (i = 0; i < str->iso2022.esc_bytes_index; i++)
+               DECODE_ADD_BINARY_CHAR (str->iso2022.esc_bytes[i], dst);
+             flags &= CODING_STATE_ISO2022_LOCK;
+             if (!retval)
+               n++, src--;/* Repeat the loop with the same character. */
+             else
+               {
+                 /* No sense in reprocessing the final byte of the
+                    escape sequence; it could mess things up anyway.
+                    Just add it now. */
+                 DECODE_ADD_BINARY_CHAR (c, dst);
+               }
+           }
+         ch = 0;
+       }
+      else if (BYTE_C0_P (c) || BYTE_C1_P (c))
+       { /* Control characters */
+
+         /***** Error-handling *****/
+
+         /* If we were in the middle of a character, dump out the
+            partial character. */
+         DECODE_OUTPUT_PARTIAL_CHAR (ch);
+
+         /* If we just saw a single-shift character, dump it out.
+            This may dump out the wrong sort of single-shift character,
+            but least it will give an indication that something went
+            wrong. */
+         if (flags & CODING_STATE_SS2)
+           {
+             DECODE_ADD_BINARY_CHAR (ISO_CODE_SS2, dst);
+             flags &= ~CODING_STATE_SS2;
+           }
+         if (flags & CODING_STATE_SS3)
+           {
+             DECODE_ADD_BINARY_CHAR (ISO_CODE_SS3, dst);
+             flags &= ~CODING_STATE_SS3;
+           }
+
+         /***** Now handle the control characters. *****/
+
+         /* Handle CR/LF */
+         DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst);
+
+         flags &= CODING_STATE_ISO2022_LOCK;
+
+         if (!parse_iso2022_esc (coding_system, &str->iso2022, c, &flags, 1))
+           DECODE_ADD_BINARY_CHAR (c, dst);
+       }
+      else
+       {                       /* Graphic characters */
+         Lisp_Object charset;
+#ifndef UTF2000
+         Charset_ID lb;
+#endif
+         int reg;
+
+         DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst);
+
+         /* Now determine the charset. */
+         reg = ((flags & CODING_STATE_SS2) ? 2
+                : (flags & CODING_STATE_SS3) ? 3
+                : !BYTE_ASCII_P (c) ? str->iso2022.register_right
+                : str->iso2022.register_left);
+         charset = str->iso2022.charset[reg];
+
+         /* Error checking: */
+         if (! CHARSETP (charset)
+             || str->iso2022.invalid_designated[reg]
+             || (((c & 0x7F) == ' ' || (c & 0x7F) == ISO_CODE_DEL)
+                 && XCHARSET_CHARS (charset) == 94))
+           /* Mrmph.  We are trying to invoke a register that has no
+              or an invalid charset in it, or trying to add a character
+              outside the range of the charset.  Insert that char literally
+              to preserve it for the output. */
+           {
+             DECODE_OUTPUT_PARTIAL_CHAR (ch);
+             DECODE_ADD_BINARY_CHAR (c, dst);
+           }
+
+         else
+           {
+             /* Things are probably hunky-dorey. */
+
+             /* Fetch reverse charset, maybe. */
+             if (((flags & CODING_STATE_R2L) &&
+                  XCHARSET_DIRECTION (charset) == CHARSET_LEFT_TO_RIGHT)
+                 ||
+                 (!(flags & CODING_STATE_R2L) &&
+                  XCHARSET_DIRECTION (charset) == CHARSET_RIGHT_TO_LEFT))
+               {
+                 Lisp_Object new_charset =
+                   XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
+                 if (!NILP (new_charset))
+                   charset = new_charset;
+               }
+
+#ifndef UTF2000
+             lb = XCHARSET_LEADING_BYTE (charset);
+#endif
+             switch (XCHARSET_REP_BYTES (charset))
+               {
+               case 1: /* ASCII */
+                 DECODE_OUTPUT_PARTIAL_CHAR (ch);
+                 Dynarr_add (dst, c & 0x7F);
+                 break;
+
+               case 2: /* one-byte official */
+                 DECODE_OUTPUT_PARTIAL_CHAR (ch);
+#ifdef UTF2000
+                 DECODE_ADD_UCS_CHAR(MAKE_CHAR(charset, c & 0x7F, 0), dst);
+#else
+                 Dynarr_add (dst, lb);
+                 Dynarr_add (dst, c | 0x80);
+#endif
+                 break;
+
+               case 3: /* one-byte private or two-byte official */
+#ifdef UTF2000
+                 if (XCHARSET_DIMENSION (charset) == 1)
+#else
+                 if (XCHARSET_PRIVATE_P (charset))
+#endif
+                   {
+                     DECODE_OUTPUT_PARTIAL_CHAR (ch);
+#ifdef UTF2000
+                     DECODE_ADD_UCS_CHAR(MAKE_CHAR(charset, c & 0x7F, 0),
+                                         dst);
+#else
+                     Dynarr_add (dst, PRE_LEADING_BYTE_PRIVATE_1);
+                     Dynarr_add (dst, lb);
+                     Dynarr_add (dst, c | 0x80);
+#endif
+                   }
+                 else
+                   {
+                     if (ch)
+                       {
+#ifdef UTF2000
+                         DECODE_ADD_UCS_CHAR(MAKE_CHAR(charset,
+                                                       ch & 0x7F,
+                                                       c & 0x7F), dst);
+#else
+                         Dynarr_add (dst, lb);
+                         Dynarr_add (dst, ch | 0x80);
+                         Dynarr_add (dst, c | 0x80);
+#endif
+                         ch = 0;
+                       }
+                     else
+                       ch = c;
+                   }
+                 break;
+
+               default:        /* two-byte private */
+                 if (ch)
+                   {
+#ifdef UTF2000
+                     DECODE_ADD_UCS_CHAR(MAKE_CHAR(charset,
+                                                   ch & 0x7F,
+                                                   c & 0x7F), dst);
+#else
+                     Dynarr_add (dst, PRE_LEADING_BYTE_PRIVATE_2);
+                     Dynarr_add (dst, lb);
+                     Dynarr_add (dst, ch | 0x80);
+                     Dynarr_add (dst, c | 0x80);
+#endif
+                     ch = 0;
+                   }
+                 else
+                   ch = c;
+               }
+           }
+
+         if (!ch)
+           flags &= CODING_STATE_ISO2022_LOCK;
+       }
+
+    label_continue_loop:;
+    }
+
+  if (flags & CODING_STATE_END)
+    DECODE_OUTPUT_PARTIAL_CHAR (ch);
+
+  str->flags = flags;
+  str->ch    = ch;
+}
+
+
+/***** ISO2022 encoder *****/
+
+/* Designate CHARSET into register REG. */
+
+static void
+iso2022_designate (Lisp_Object charset, unsigned char reg,
+                  struct encoding_stream *str, unsigned_char_dynarr *dst)
+{
+  static CONST char inter94[] = "()*+";
+  static CONST char inter96[] = ",-./";
+  unsigned int type;
+  unsigned char final;
+  Lisp_Object old_charset = str->iso2022.charset[reg];
+
+  str->iso2022.charset[reg] = charset;
+  if (!CHARSETP (charset))
+    /* charset might be an initial nil or t. */
+    return;
+  type = XCHARSET_TYPE (charset);
+  final = XCHARSET_FINAL (charset);
+  if (!str->iso2022.force_charset_on_output[reg] &&
+      CHARSETP (old_charset) &&
+      XCHARSET_TYPE (old_charset) == type &&
+      XCHARSET_FINAL (old_charset) == final)
+    return;
+
+  str->iso2022.force_charset_on_output[reg] = 0;
+
+  {
+    charset_conversion_spec_dynarr *dyn =
+      str->codesys->iso2022.output_conv;
+
+    if (dyn)
+      {
+       int i;
+
+       for (i = 0; i < Dynarr_length (dyn); i++)
+         {
+           struct charset_conversion_spec *spec = Dynarr_atp (dyn, i);
+           if (EQ (charset, spec->from_charset))
+               charset = spec->to_charset;
+         }
+      }
+  }
+
+  Dynarr_add (dst, ISO_CODE_ESC);
+  switch (type)
+    {
+    case CHARSET_TYPE_94:
+      Dynarr_add (dst, inter94[reg]);
+      break;
+    case CHARSET_TYPE_96:
+      Dynarr_add (dst, inter96[reg]);
+      break;
+    case CHARSET_TYPE_94X94:
+      Dynarr_add (dst, '$');
+      if (reg != 0
+         || !(CODING_SYSTEM_ISO2022_SHORT (str->codesys))
+         || final < '@'
+         || final > 'B')
+       Dynarr_add (dst, inter94[reg]);
+      break;
+    case CHARSET_TYPE_96X96:
+      Dynarr_add (dst, '$');
+      Dynarr_add (dst, inter96[reg]);
+      break;
+    }
+  Dynarr_add (dst, final);
+}
+
+static void
+ensure_normal_shift (struct encoding_stream *str, unsigned_char_dynarr *dst)
+{
+  if (str->iso2022.register_left != 0)
+    {
+      Dynarr_add (dst, ISO_CODE_SI);
+      str->iso2022.register_left = 0;
+    }
+}
+
+static void
+ensure_shift_out (struct encoding_stream *str, unsigned_char_dynarr *dst)
+{
+  if (str->iso2022.register_left != 1)
+    {
+      Dynarr_add (dst, ISO_CODE_SO);
+      str->iso2022.register_left = 1;
+    }
+}
+
+void
+char_encode_iso2022 (struct encoding_stream *str, Emchar ch,
+                    unsigned_char_dynarr *dst, unsigned int *flags)
+{
+  unsigned char charmask;
+  Lisp_Coding_System* codesys = str->codesys;
+  eol_type_t eol_type         = CODING_SYSTEM_EOL_TYPE (str->codesys);
+  int i;
+  Lisp_Object charset;
+  int half;
+  unsigned int byte1, byte2;
+
+  if (ch <= 0x7F)
+    {
+      restore_left_to_right_direction (codesys, dst, flags, 0);
+             
+      /* Make sure G0 contains ASCII */
+      if ((ch > ' ' && ch < ISO_CODE_DEL)
+         || !CODING_SYSTEM_ISO2022_NO_ASCII_CNTL (codesys))
+       {
+         ensure_normal_shift (str, dst);
+         iso2022_designate (Vcharset_ascii, 0, str, dst);
+       }
+             
+      /* If necessary, restore everything to the default state
+        at end-of-line */
+      if (ch == '\n' && !(CODING_SYSTEM_ISO2022_NO_ASCII_EOL (codesys)))
+       {
+         restore_left_to_right_direction (codesys, dst, flags, 0);
+
+         ensure_normal_shift (str, dst);
+
+         for (i = 0; i < 4; i++)
+           {
+             Lisp_Object initial_charset =
+               CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i);
+             iso2022_designate (initial_charset, i, str, dst);
+           }
+       }
+      if (ch == '\n')
+       {
+         if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
+           Dynarr_add (dst, '\r');
+         if (eol_type != EOL_CR)
+           Dynarr_add (dst, ch);
+       }
+      else
+       {
+         if (CODING_SYSTEM_ISO2022_ESCAPE_QUOTED (codesys)
+             && fit_to_be_escape_quoted (ch))
+           Dynarr_add (dst, ISO_CODE_ESC);
+         Dynarr_add (dst, ch);
+       }
+    }
+  else if ( (0x80 <= ch) && (ch <= 0x9f) )
+    {
+      charmask = (half == 0 ? 0x00 : 0x80);
+         
+      if (CODING_SYSTEM_ISO2022_ESCAPE_QUOTED (codesys)
+         && fit_to_be_escape_quoted (ch))
+       Dynarr_add (dst, ISO_CODE_ESC);
+      /* you asked for it ... */
+      Dynarr_add (dst, ch);
+    }
+  else
+    {
+      int reg;
+
+      BREAKUP_CHAR (ch, charset, byte1, byte2);
+      ensure_correct_direction (XCHARSET_DIRECTION (charset),
+                               codesys, dst, flags, 0);
+
+      /* Now determine which register to use. */
+      reg = -1;
+      for (i = 0; i < 4; i++)
+       {
+         if (EQ (charset, str->iso2022.charset[i]) ||
+             EQ (charset,
+                 CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i)))
+           {
+             reg = i;
+             break;
+           }
+       }
+             
+      if (reg == -1)
+       {
+         if (XCHARSET_GRAPHIC (charset) != 0)
+           {
+             if (!NILP (str->iso2022.charset[1]) &&
+                 (!CODING_SYSTEM_ISO2022_SEVEN (codesys)
+                  || CODING_SYSTEM_ISO2022_LOCK_SHIFT (codesys)))
+               reg = 1;
+             else if (!NILP (str->iso2022.charset[2]))
+               reg = 2;
+             else if (!NILP (str->iso2022.charset[3]))
+               reg = 3;
+             else
+               reg = 0;
+           }
+         else
+           reg = 0;
+       }
+
+      iso2022_designate (charset, reg, str, dst);
+             
+      /* Now invoke that register. */
+      switch (reg)
+       {
+       case 0:
+         ensure_normal_shift (str, dst);
+         half = 0;
+         break;
+       case 1:
+         if (CODING_SYSTEM_ISO2022_SEVEN (codesys))
+           {
+             ensure_shift_out (str, dst);
+             half = 0;
+           }
+         else
+           half = 1;
+         break;
+       case 2:
+         if (CODING_SYSTEM_ISO2022_SEVEN (str->codesys))
+           {
+             Dynarr_add (dst, ISO_CODE_ESC);
+             Dynarr_add (dst, 'N');
+             half = 0;
+           }
+         else
+           {
+             Dynarr_add (dst, ISO_CODE_SS2);
+             half = 1;
+           }
+         break;
+       case 3:
+         if (CODING_SYSTEM_ISO2022_SEVEN (str->codesys))
+           {
+             Dynarr_add (dst, ISO_CODE_ESC);
+             Dynarr_add (dst, 'O');
+             half = 0;
+           }
+         else
+           {
+             Dynarr_add (dst, ISO_CODE_SS3);
+             half = 1;
+           }
+         break;
+       default:
+         abort ();
+       }
+      
+      charmask = (half == 0 ? 0x00 : 0x80);
+      
+      switch (XCHARSET_DIMENSION (charset))
+       {
+       case 1:
+         Dynarr_add (dst, byte1 | charmask);
+         break;
+       case 2:
+         Dynarr_add (dst, byte1 | charmask);
+         Dynarr_add (dst, byte2 | charmask);
+         break;
+       default:
+         abort ();
+       }
+    }
+}
+
+void
+char_finish_iso2022 (struct encoding_stream *str, unsigned_char_dynarr *dst,
+                    unsigned int *flags)
+{
+  Lisp_Coding_System* codesys = str->codesys;
+  int i;
+
+  restore_left_to_right_direction (codesys, dst, flags, 0);
+  ensure_normal_shift (str, dst);
+  for (i = 0; i < 4; i++)
+    {
+      Lisp_Object initial_charset
+       = CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i);
+      iso2022_designate (initial_charset, i, str, dst);
+    }
+}
+#endif /* MULE */
+\f
+/************************************************************************/
+/*                     No-conversion methods                            */
+/************************************************************************/
+
+/* This is used when reading in "binary" files -- i.e. files that may
+   contain all 256 possible byte values and that are not to be
+   interpreted as being in any particular decoding. */
+static void
+decode_coding_no_conversion (Lstream *decoding, CONST unsigned char *src,
+                            unsigned_char_dynarr *dst, unsigned int n)
+{
+  unsigned char c;
+  struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
+  unsigned int flags  = str->flags;
+  unsigned int ch     = str->ch;
+  eol_type_t eol_type = str->eol_type;
+
+  while (n--)
+    {
+      c = *src++;
+
+      DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst);
+      DECODE_ADD_BINARY_CHAR (c, dst);
+    label_continue_loop:;
+    }
+
+  DECODE_HANDLE_END_OF_CONVERSION (flags, ch, dst);
+
+  str->flags = flags;
+  str->ch    = ch;
+}
+
+static void
+encode_coding_no_conversion (Lstream *encoding, CONST unsigned char *src,
+                            unsigned_char_dynarr *dst, unsigned int n)
+{
+  unsigned char c;
+  struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
+  unsigned int flags  = str->flags;
+  unsigned int ch     = str->ch;
+  eol_type_t eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys);
+#ifdef UTF2000
+  unsigned char char_boundary = str->iso2022.current_char_boundary;
+#endif
+
+  while (n--)
+    {
+      c = *src++;        
+#ifdef UTF2000
+      switch (char_boundary)
+       {
+       case 0:
+         if ( c >= 0xfc )
+           {
+             ch = c & 0x01;
+             char_boundary = 5;
+           }
+         else if ( c >= 0xf8 )
+           {
+             ch = c & 0x03;
+             char_boundary = 4;
+           }
+         else if ( c >= 0xf0 )
+           {
+             ch = c & 0x07;
+             char_boundary = 3;
+           }
+         else if ( c >= 0xe0 )
+           {
+             ch = c & 0x0f;
+             char_boundary = 2;
+           }
+         else if ( c >= 0xc0 )
+           {
+             ch = c & 0x1f;
+             char_boundary = 1;
+           }
+         else
+           {
+             ch = 0;
+
+             if (c == '\n')
+               {
+                 if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
+                   Dynarr_add (dst, '\r');
+                 if (eol_type != EOL_CR)
+                   Dynarr_add (dst, c);
+               }
+             else
+               Dynarr_add (dst, c);
+             char_boundary = 0;
+           }
+         break;
+       case 1:
+         ch = ( ch << 6 ) | ( c & 0x3f );
+         switch ( str->codesys->fixed.size )
+           {
+           case 1:
+             Dynarr_add (dst, ch & 0xff);
+             break;
+           case 2:
+             Dynarr_add (dst, (ch >> 8) & 0xff);
+             Dynarr_add (dst,  ch       & 0xff);
+             break;
+           case 3:
+             Dynarr_add (dst, (ch >> 16) & 0xff);
+             Dynarr_add (dst, (ch >>  8) & 0xff);
+             Dynarr_add (dst,  ch        & 0xff);
+             break;
+           case 4:
+             Dynarr_add (dst, (ch >> 24) & 0xff);
+             Dynarr_add (dst, (ch >> 16) & 0xff);
+             Dynarr_add (dst, (ch >>  8) & 0xff);
+             Dynarr_add (dst,  ch        & 0xff);
+             break;
+           default:
+             fprintf(stderr, "It seems %d bytes stream.\n",
+                     str->codesys->fixed.size);
+             abort ();
+           }
+         char_boundary = 0;
+         break;
+       default:
+         ch = ( ch << 6 ) | ( c & 0x3f );
+         char_boundary--;
+       }
+#else /* not UTF2000 */
+      if (c == '\n')
+       {
+         if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
+           Dynarr_add (dst, '\r');
+         if (eol_type != EOL_CR)
+           Dynarr_add (dst, '\n');
+         ch = 0;
+       }
+      else if (BYTE_ASCII_P (c))
+       {
+         assert (ch == 0);
+         Dynarr_add (dst, c);
+       }
+      else if (BUFBYTE_LEADING_BYTE_P (c))
+       {
+         assert (ch == 0);
+         if (c == LEADING_BYTE_LATIN_ISO8859_1 ||
+             c == LEADING_BYTE_CONTROL_1)
+           ch = c;
+         else
+           Dynarr_add (dst, '~'); /* untranslatable character */
+       }
+      else
+       {
+         if (ch == LEADING_BYTE_LATIN_ISO8859_1)
+           Dynarr_add (dst, c);
+         else if (ch == LEADING_BYTE_CONTROL_1)
+           {
+             assert (c < 0xC0);
+             Dynarr_add (dst, c - 0x20);
+           }
+         /* else it should be the second or third byte of an
+            untranslatable character, so ignore it */
+         ch = 0;
+       }
+#endif /* not UTF2000 */
+    }
+
+  str->flags = flags;
+  str->ch    = ch;
+#ifdef UTF2000
+  str->iso2022.current_char_boundary = char_boundary;
+#endif
+}
+
+\f
+/************************************************************************/
+/*                   Simple internal/external functions                 */
+/************************************************************************/
+
+static Extbyte_dynarr *conversion_out_dynarr;
+static Bufbyte_dynarr *conversion_in_dynarr;
+
+/* Determine coding system from coding format */
+
+/* #### not correct for all values of `fmt'! */
+static Lisp_Object
+external_data_format_to_coding_system (enum external_data_format fmt)
+{
+  switch (fmt)
+    {
+    case FORMAT_FILENAME:
+    case FORMAT_TERMINAL:
+      if (EQ (Vfile_name_coding_system, Qnil) ||
+         EQ (Vfile_name_coding_system, Qbinary))
+       return Qnil;
+      else
+       return Fget_coding_system (Vfile_name_coding_system);
+#ifdef MULE
+    case FORMAT_CTEXT:
+      return Fget_coding_system (Qctext);
+#endif
+    default:
+      return Qnil;
+    }
+}
+
+Extbyte *
+convert_to_external_format (CONST Bufbyte *ptr,
+                           Bytecount len,
+                           Extcount *len_out,
+                           enum external_data_format fmt)
+{
+  Lisp_Object coding_system = external_data_format_to_coding_system (fmt);
+
+  if (!conversion_out_dynarr)
+    conversion_out_dynarr = Dynarr_new (Extbyte);
+  else
+    Dynarr_reset (conversion_out_dynarr);
+
+  if (NILP (coding_system))
+    {
+      CONST Bufbyte *end = ptr + len;
+
+      for (; ptr < end;)
+        {
+#ifdef UTF2000
+          Bufbyte c =
+           (*ptr < 0xc0) ? *ptr :
+           ((*ptr & 0x1f) << 6) | (*(ptr+1) & 0x3f);
+#else
+          Bufbyte c =
+            (BYTE_ASCII_P (*ptr))                 ? *ptr :
+            (*ptr == LEADING_BYTE_CONTROL_1)      ? (*(ptr+1) - 0x20) :
+            (*ptr == LEADING_BYTE_LATIN_ISO8859_1) ? (*(ptr+1)) :
+            '~';
+#endif
+          Dynarr_add (conversion_out_dynarr, (Extbyte) c);
+          INC_CHARPTR (ptr);
+        }
+
+#ifdef ERROR_CHECK_BUFPOS
+      assert (ptr == end);
+#endif
+    }
+  else
+    {
+      Lisp_Object instream, outstream, da_outstream;
+      Lstream *istr, *ostr;
+      struct gcpro gcpro1, gcpro2, gcpro3;
+      char tempbuf[1024]; /* some random amount */
+
+      instream = make_fixed_buffer_input_stream ((unsigned char *) ptr, len);
+      da_outstream = make_dynarr_output_stream
+        ((unsigned_char_dynarr *) conversion_out_dynarr);
+      outstream =
+        make_encoding_output_stream (XLSTREAM (da_outstream), coding_system);
+      istr = XLSTREAM (instream);
+      ostr = XLSTREAM (outstream);
+      GCPRO3 (instream, outstream, da_outstream);
+      while (1)
+        {
+          int size_in_bytes = Lstream_read (istr, tempbuf, sizeof (tempbuf));
+          if (!size_in_bytes)
+            break;
+          Lstream_write (ostr, tempbuf, size_in_bytes);
+        }
+      Lstream_close (istr);
+      Lstream_close (ostr);
+      UNGCPRO;
+      Lstream_delete (istr);
+      Lstream_delete (ostr);
+      Lstream_delete (XLSTREAM (da_outstream));
+    }
+
+  *len_out = Dynarr_length (conversion_out_dynarr);
+  Dynarr_add (conversion_out_dynarr, 0); /* remember to zero-terminate! */
+  return Dynarr_atp (conversion_out_dynarr, 0);
+}
+
+Bufbyte *
+convert_from_external_format (CONST Extbyte *ptr,
+                             Extcount len,
+                             Bytecount *len_out,
+                             enum external_data_format fmt)
+{
+  Lisp_Object coding_system = external_data_format_to_coding_system (fmt);
+
+  if (!conversion_in_dynarr)
+    conversion_in_dynarr = Dynarr_new (Bufbyte);
+  else
+    Dynarr_reset (conversion_in_dynarr);
+
+  if (NILP (coding_system))
+    {
+      CONST Extbyte *end = ptr + len;
+      for (; ptr < end; ptr++)
+        {
+          Extbyte c = *ptr;
+          DECODE_ADD_BINARY_CHAR (c, conversion_in_dynarr);
+        }
+    }
+  else
+    {
+      Lisp_Object instream, outstream, da_outstream;
+      Lstream *istr, *ostr;
+      struct gcpro gcpro1, gcpro2, gcpro3;
+      char tempbuf[1024]; /* some random amount */
+
+      instream = make_fixed_buffer_input_stream ((unsigned char *) ptr, len);
+      da_outstream = make_dynarr_output_stream
+        ((unsigned_char_dynarr *) conversion_in_dynarr);
+      outstream =
+        make_decoding_output_stream (XLSTREAM (da_outstream), coding_system);
+      istr = XLSTREAM (instream);
+      ostr = XLSTREAM (outstream);
+      GCPRO3 (instream, outstream, da_outstream);
+      while (1)
+        {
+          int size_in_bytes = Lstream_read (istr, tempbuf, sizeof (tempbuf));
+          if (!size_in_bytes)
+            break;
+          Lstream_write (ostr, tempbuf, size_in_bytes);
+        }
+      Lstream_close (istr);
+      Lstream_close (ostr);
+      UNGCPRO;
+      Lstream_delete (istr);
+      Lstream_delete (ostr);
+      Lstream_delete (XLSTREAM (da_outstream));
+    }
+
+  *len_out = Dynarr_length (conversion_in_dynarr);
+  Dynarr_add (conversion_in_dynarr, 0); /* remember to zero-terminate! */
+  return Dynarr_atp (conversion_in_dynarr, 0);
+}
+
+\f
+/************************************************************************/
+/*                             Initialization                           */
+/************************************************************************/
+
+void
+syms_of_file_coding (void)
+{
+  defsymbol (&Qbuffer_file_coding_system, "buffer-file-coding-system");
+  deferror (&Qcoding_system_error, "coding-system-error",
+           "Coding-system error", Qio_error);
+
+  DEFSUBR (Fcoding_system_p);
+  DEFSUBR (Ffind_coding_system);
+  DEFSUBR (Fget_coding_system);
+  DEFSUBR (Fcoding_system_list);
+  DEFSUBR (Fcoding_system_name);
+  DEFSUBR (Fmake_coding_system);
+  DEFSUBR (Fcopy_coding_system);
+  DEFSUBR (Fdefine_coding_system_alias);
+  DEFSUBR (Fsubsidiary_coding_system);
+
+  DEFSUBR (Fcoding_system_type);
+  DEFSUBR (Fcoding_system_doc_string);
+#ifdef MULE
+  DEFSUBR (Fcoding_system_charset);
+#endif
+  DEFSUBR (Fcoding_system_property);
+
+  DEFSUBR (Fcoding_category_list);
+  DEFSUBR (Fset_coding_priority_list);
+  DEFSUBR (Fcoding_priority_list);
+  DEFSUBR (Fset_coding_category_system);
+  DEFSUBR (Fcoding_category_system);
+
+  DEFSUBR (Fdetect_coding_region);
+  DEFSUBR (Fdecode_coding_region);
+  DEFSUBR (Fencode_coding_region);
+#ifdef MULE
+  DEFSUBR (Fdecode_shift_jis_char);
+  DEFSUBR (Fencode_shift_jis_char);
+  DEFSUBR (Fdecode_big5_char);
+  DEFSUBR (Fencode_big5_char);
+  DEFSUBR (Fset_ucs_char);
+  DEFSUBR (Fucs_char);
+  DEFSUBR (Fset_char_ucs);
+  DEFSUBR (Fchar_ucs);
+#endif /* MULE */
+  defsymbol (&Qcoding_system_p, "coding-system-p");
+  defsymbol (&Qno_conversion, "no-conversion");
+  defsymbol (&Qraw_text, "raw-text");
+#ifdef MULE
+  defsymbol (&Qbig5, "big5");
+  defsymbol (&Qshift_jis, "shift-jis");
+  defsymbol (&Qucs4, "ucs-4");
+  defsymbol (&Qutf8, "utf-8");
+  defsymbol (&Qccl, "ccl");
+  defsymbol (&Qiso2022, "iso2022");
+#endif /* MULE */
+  defsymbol (&Qmnemonic, "mnemonic");
+  defsymbol (&Qeol_type, "eol-type");
+  defsymbol (&Qpost_read_conversion, "post-read-conversion");
+  defsymbol (&Qpre_write_conversion, "pre-write-conversion");
+
+  defsymbol (&Qcr, "cr");
+  defsymbol (&Qlf, "lf");
+  defsymbol (&Qcrlf, "crlf");
+  defsymbol (&Qeol_cr, "eol-cr");
+  defsymbol (&Qeol_lf, "eol-lf");
+  defsymbol (&Qeol_crlf, "eol-crlf");
+#ifdef MULE
+  defsymbol (&Qcharset_g0, "charset-g0");
+  defsymbol (&Qcharset_g1, "charset-g1");
+  defsymbol (&Qcharset_g2, "charset-g2");
+  defsymbol (&Qcharset_g3, "charset-g3");
+  defsymbol (&Qforce_g0_on_output, "force-g0-on-output");
+  defsymbol (&Qforce_g1_on_output, "force-g1-on-output");
+  defsymbol (&Qforce_g2_on_output, "force-g2-on-output");
+  defsymbol (&Qforce_g3_on_output, "force-g3-on-output");
+  defsymbol (&Qno_iso6429, "no-iso6429");
+  defsymbol (&Qinput_charset_conversion, "input-charset-conversion");
+  defsymbol (&Qoutput_charset_conversion, "output-charset-conversion");
+
+  defsymbol (&Qshort, "short");
+  defsymbol (&Qno_ascii_eol, "no-ascii-eol");
+  defsymbol (&Qno_ascii_cntl, "no-ascii-cntl");
+  defsymbol (&Qseven, "seven");
+  defsymbol (&Qlock_shift, "lock-shift");
+  defsymbol (&Qescape_quoted, "escape-quoted");
+#endif /* MULE */
+  defsymbol (&Qencode, "encode");
+  defsymbol (&Qdecode, "decode");
+
+#ifdef MULE
+  defsymbol (&Qctext, "ctext");
+  defsymbol (&coding_category_symbol[CODING_CATEGORY_SHIFT_JIS],
+            "shift-jis");
+  defsymbol (&coding_category_symbol[CODING_CATEGORY_BIG5],
+            "big5");
+  defsymbol (&coding_category_symbol[CODING_CATEGORY_UCS4],
+            "ucs-4");
+  defsymbol (&coding_category_symbol[CODING_CATEGORY_UTF8],
+            "utf-8");
+  defsymbol (&coding_category_symbol[CODING_CATEGORY_ISO_7],
+            "iso-7");
+  defsymbol (&coding_category_symbol[CODING_CATEGORY_ISO_8_DESIGNATE],
+            "iso-8-designate");
+  defsymbol (&coding_category_symbol[CODING_CATEGORY_ISO_8_1],
+            "iso-8-1");
+  defsymbol (&coding_category_symbol[CODING_CATEGORY_ISO_8_2],
+            "iso-8-2");
+  defsymbol (&coding_category_symbol[CODING_CATEGORY_ISO_LOCK_SHIFT],
+            "iso-lock-shift");
+#endif /* MULE */
+  defsymbol (&coding_category_symbol[CODING_CATEGORY_NO_CONVERSION],
+            "no-conversion");
+}
+
+void
+lstream_type_create_file_coding (void)
+{
+  LSTREAM_HAS_METHOD (decoding, reader);
+  LSTREAM_HAS_METHOD (decoding, writer);
+  LSTREAM_HAS_METHOD (decoding, rewinder);
+  LSTREAM_HAS_METHOD (decoding, seekable_p);
+  LSTREAM_HAS_METHOD (decoding, flusher);
+  LSTREAM_HAS_METHOD (decoding, closer);
+  LSTREAM_HAS_METHOD (decoding, marker);
+
+  LSTREAM_HAS_METHOD (encoding, reader);
+  LSTREAM_HAS_METHOD (encoding, writer);
+  LSTREAM_HAS_METHOD (encoding, rewinder);
+  LSTREAM_HAS_METHOD (encoding, seekable_p);
+  LSTREAM_HAS_METHOD (encoding, flusher);
+  LSTREAM_HAS_METHOD (encoding, closer);
+  LSTREAM_HAS_METHOD (encoding, marker);
+}
+
+void
+vars_of_file_coding (void)
+{
+  int i;
+
+  /* Initialize to something reasonable ... */
+  for (i = 0; i <= CODING_CATEGORY_LAST; i++)
+    {
+      coding_category_system[i] = Qnil;
+      coding_category_by_priority[i] = i;
+    }
+
+  Fprovide (intern ("file-coding"));
+
+  DEFVAR_LISP ("keyboard-coding-system", &Vkeyboard_coding_system /*
+Coding system used for TTY keyboard input.
+Not used under a windowing system.
+*/ );
+  Vkeyboard_coding_system = Qnil;
+
+  DEFVAR_LISP ("terminal-coding-system", &Vterminal_coding_system /*
+Coding system used for TTY display output.
+Not used under a windowing system.
+*/ );
+  Vterminal_coding_system = Qnil;
+
+  DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read /*
+Overriding coding system used when writing a file or process.
+You should *bind* this, not set it.  If this is non-nil, it specifies
+the coding system that will be used when a file or process is read
+in, and overrides `buffer-file-coding-system-for-read',
+`insert-file-contents-pre-hook', etc.  Use those variables instead of
+this one for permanent changes to the environment.
+*/ );
+  Vcoding_system_for_read = Qnil;
+
+  DEFVAR_LISP ("coding-system-for-write",
+               &Vcoding_system_for_write /*
+Overriding coding system used when writing a file or process.
+You should *bind* this, not set it.  If this is non-nil, it specifies
+the coding system that will be used when a file or process is wrote
+in, and overrides `buffer-file-coding-system',
+`write-region-pre-hook', etc.  Use those variables instead of this one
+for permanent changes to the environment.
+*/ );
+  Vcoding_system_for_write = Qnil;
+
+  DEFVAR_LISP ("file-name-coding-system", &Vfile_name_coding_system /*
+Coding system used to convert pathnames when accessing files.
+*/ );
+  Vfile_name_coding_system = Qnil;
+
+  DEFVAR_BOOL ("enable-multibyte-characters", &enable_multibyte_characters /*
+Non-nil means the buffer contents are regarded as multi-byte form
+of characters, not a binary code.  This affects the display, file I/O,
+and behaviors of various editing commands.
+
+Setting this to nil does not do anything.
+*/ );
+  enable_multibyte_characters = 1;
+}
+
+void
+complex_vars_of_file_coding (void)
+{
+  staticpro (&Vcoding_system_hash_table);
+  Vcoding_system_hash_table =
+    make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
+
+  the_codesys_prop_dynarr = Dynarr_new (codesys_prop);
+
+#define DEFINE_CODESYS_PROP(Prop_Type, Sym) do \
+{                                              \
+  struct codesys_prop csp;                     \
+  csp.sym = (Sym);                             \
+  csp.prop_type = (Prop_Type);                 \
+  Dynarr_add (the_codesys_prop_dynarr, csp);   \
+} while (0)
+
+  DEFINE_CODESYS_PROP (CODESYS_PROP_ALL_OK,  Qmnemonic);
+  DEFINE_CODESYS_PROP (CODESYS_PROP_ALL_OK,  Qeol_type);
+  DEFINE_CODESYS_PROP (CODESYS_PROP_ALL_OK,  Qeol_cr);
+  DEFINE_CODESYS_PROP (CODESYS_PROP_ALL_OK,  Qeol_crlf);
+  DEFINE_CODESYS_PROP (CODESYS_PROP_ALL_OK,  Qeol_lf);
+  DEFINE_CODESYS_PROP (CODESYS_PROP_ALL_OK,  Qpost_read_conversion);
+  DEFINE_CODESYS_PROP (CODESYS_PROP_ALL_OK,  Qpre_write_conversion);
+#ifdef MULE
+  DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qcharset_g0);
+  DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qcharset_g1);
+  DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qcharset_g2);
+  DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qcharset_g3);
+  DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qforce_g0_on_output);
+  DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qforce_g1_on_output);
+  DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qforce_g2_on_output);
+  DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qforce_g3_on_output);
+  DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qshort);
+  DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qno_ascii_eol);
+  DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qno_ascii_cntl);
+  DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qseven);
+  DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qlock_shift);
+  DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qno_iso6429);
+  DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qescape_quoted);
+  DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qinput_charset_conversion);
+  DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qoutput_charset_conversion);
+
+  DEFINE_CODESYS_PROP (CODESYS_PROP_CCL,     Qencode);
+  DEFINE_CODESYS_PROP (CODESYS_PROP_CCL,     Qdecode);
+#endif /* MULE */
+  /* Need to create this here or we're really screwed. */
+  Fmake_coding_system
+    (Qraw_text, Qno_conversion,
+     build_string ("Raw text, which means it converts only line-break-codes."),
+     list2 (Qmnemonic, build_string ("Raw")));
+
+  Fmake_coding_system
+    (Qbinary, Qno_conversion,
+     build_string ("Binary, which means it does not convert anything."),
+     list4 (Qeol_type, Qlf,
+           Qmnemonic, build_string ("Binary")));
+
+#ifdef UTF2000
+  Fmake_coding_system
+    (Qutf8, Qutf8,
+     build_string ("Coding-system of ISO/IEC 10646 UTF-8."),
+     list2 (Qmnemonic, build_string ("UTF8")));
+#endif
+
+  Fdefine_coding_system_alias (Qno_conversion, Qraw_text);
+
+  /* Need this for bootstrapping */
+  coding_category_system[CODING_CATEGORY_NO_CONVERSION] =
+    Fget_coding_system (Qraw_text);
+
+#ifdef UTF2000
+  coding_category_system[CODING_CATEGORY_UTF8]
+   = Fget_coding_system (Qutf8);
+#endif
+
+#ifdef MULE
+  {
+    unsigned int i;
+
+    for (i = 0; i < 65536; i++)
+      ucs_to_mule_table[i] = Qnil;
+  }
+  staticpro (&mule_to_ucs_table);
+  mule_to_ucs_table = Fmake_char_table(Qgeneric);
+#endif /* MULE */
+}
author	tomo <tomo>
	Thu, 9 Sep 1999 06:08:42 +0000 (06:08 +0000)
committer	tomo <tomo>
	Thu, 9 Sep 1999 06:08:42 +0000 (06:08 +0000)